R Markdown
#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
##
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
##
## nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
##
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
##
## compare
## The following object is masked from 'package:class':
##
## knn
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8 2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## The following object is masked from 'package:kernlab':
##
## alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine() masks randomForest::combine()
## ✖ purrr::compose() masks igraph::compose()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::margin() masks randomForest::margin()
## ✖ purrr::none() masks locfit::none()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ purrr::simplify() masks igraph::simplify()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
##
## The following objects are masked from 'package:rstanarm':
##
## compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2024 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
##
## Attaching package: 'TDA'
##
## The following object is masked from 'package:cluster':
##
## silhouette
library(TDAstats)
library(ks)
##
## Attaching package: 'ks'
##
## The following object is masked from 'package:TDA':
##
## kde
##
## The following object is masked from 'package:MCMCpack':
##
## vech
##
## The following object is masked from 'package:igraph':
##
## compare
##
## The following object is masked from 'package:BayesFactor':
##
## compare
#install.packages('MLmetrics')
library(MLmetrics)
##
## Attaching package: 'MLmetrics'
##
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
##
## The following object is masked from 'package:base':
##
## Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
#import adult dataset from UCI repository stored on my desktop
#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
head(str(adult))
## 'data.frame': 32561 obs. of 15 variables:
## $ V1 : int 39 50 38 53 28 37 49 52 31 42 ...
## $ V2 : chr " State-gov" " Self-emp-not-inc" " Private" " Private" ...
## $ V3 : int 77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
## $ V4 : chr " Bachelors" " Bachelors" " HS-grad" " 11th" ...
## $ V5 : int 13 13 9 7 13 14 5 9 14 13 ...
## $ V6 : chr " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
## $ V7 : chr " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
## $ V8 : chr " Not-in-family" " Husband" " Not-in-family" " Husband" ...
## $ V9 : chr " White" " White" " White" " Black" ...
## $ V10: chr " Male" " Male" " Male" " Male" ...
## $ V11: int 2174 0 0 0 0 0 0 0 14084 5178 ...
## $ V12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V13: int 40 13 40 40 40 40 16 45 50 40 ...
## $ V14: chr " United-States" " United-States" " United-States" " United-States" ...
## $ V15: chr " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
summary(adult)
## V1 V2 V3 V4
## Min. :17.00 Length:32561 Min. : 12285 Length:32561
## 1st Qu.:28.00 Class :character 1st Qu.: 117827 Class :character
## Median :37.00 Mode :character Median : 178356 Mode :character
## Mean :38.58 Mean : 189778
## 3rd Qu.:48.00 3rd Qu.: 237051
## Max. :90.00 Max. :1484705
## V5 V6 V7 V8
## Min. : 1.00 Length:32561 Length:32561 Length:32561
## 1st Qu.: 9.00 Class :character Class :character Class :character
## Median :10.00 Mode :character Mode :character Mode :character
## Mean :10.08
## 3rd Qu.:12.00
## Max. :16.00
## V9 V10 V11 V12
## Length:32561 Length:32561 Min. : 0 Min. : 0.0
## Class :character Class :character 1st Qu.: 0 1st Qu.: 0.0
## Mode :character Mode :character Median : 0 Median : 0.0
## Mean : 1078 Mean : 87.3
## 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :99999 Max. :4356.0
## V13 V14 V15
## Min. : 1.00 Length:32561 Length:32561
## 1st Qu.:40.00 Class :character Class :character
## Median :40.00 Mode :character Mode :character
## Mean :40.44
## 3rd Qu.:45.00
## Max. :99.00
#Dry_Bean_Dataset **
library(readxl)
Dry_Bean_Dataset <- read_excel("~/Desktop/NCU/DissertationDatasets/DryBeanDataset/Dry_Bean_Dataset.xlsx")
head(str(Dry_Bean_Dataset))
## tibble [13,611 × 17] (S3: tbl_df/tbl/data.frame)
## $ Area : num [1:13611] 28395 28734 29380 30008 30140 ...
## $ Perimeter : num [1:13611] 610 638 624 646 620 ...
## $ MajorAxisLength: num [1:13611] 208 201 213 211 202 ...
## $ MinorAxisLength: num [1:13611] 174 183 176 183 190 ...
## $ AspectRation : num [1:13611] 1.2 1.1 1.21 1.15 1.06 ...
## $ Eccentricity : num [1:13611] 0.55 0.412 0.563 0.499 0.334 ...
## $ ConvexArea : num [1:13611] 28715 29172 29690 30724 30417 ...
## $ EquivDiameter : num [1:13611] 190 191 193 195 196 ...
## $ Extent : num [1:13611] 0.764 0.784 0.778 0.783 0.773 ...
## $ Solidity : num [1:13611] 0.989 0.985 0.99 0.977 0.991 ...
## $ roundness : num [1:13611] 0.958 0.887 0.948 0.904 0.985 ...
## $ Compactness : num [1:13611] 0.913 0.954 0.909 0.928 0.971 ...
## $ ShapeFactor1 : num [1:13611] 0.00733 0.00698 0.00724 0.00702 0.0067 ...
## $ ShapeFactor2 : num [1:13611] 0.00315 0.00356 0.00305 0.00321 0.00366 ...
## $ ShapeFactor3 : num [1:13611] 0.834 0.91 0.826 0.862 0.942 ...
## $ ShapeFactor4 : num [1:13611] 0.999 0.998 0.999 0.994 0.999 ...
## $ Class : chr [1:13611] "SEKER" "SEKER" "SEKER" "SEKER" ...
## NULL
summary(Dry_Bean_Dataset)
## Area Perimeter MajorAxisLength MinorAxisLength
## Min. : 20420 Min. : 524.7 Min. :183.6 Min. :122.5
## 1st Qu.: 36328 1st Qu.: 703.5 1st Qu.:253.3 1st Qu.:175.8
## Median : 44652 Median : 794.9 Median :296.9 Median :192.4
## Mean : 53048 Mean : 855.3 Mean :320.1 Mean :202.3
## 3rd Qu.: 61332 3rd Qu.: 977.2 3rd Qu.:376.5 3rd Qu.:217.0
## Max. :254616 Max. :1985.4 Max. :738.9 Max. :460.2
## AspectRation Eccentricity ConvexArea EquivDiameter
## Min. :1.025 Min. :0.2190 Min. : 20684 Min. :161.2
## 1st Qu.:1.432 1st Qu.:0.7159 1st Qu.: 36714 1st Qu.:215.1
## Median :1.551 Median :0.7644 Median : 45178 Median :238.4
## Mean :1.583 Mean :0.7509 Mean : 53768 Mean :253.1
## 3rd Qu.:1.707 3rd Qu.:0.8105 3rd Qu.: 62294 3rd Qu.:279.4
## Max. :2.430 Max. :0.9114 Max. :263261 Max. :569.4
## Extent Solidity roundness Compactness
## Min. :0.5553 Min. :0.9192 Min. :0.4896 Min. :0.6406
## 1st Qu.:0.7186 1st Qu.:0.9857 1st Qu.:0.8321 1st Qu.:0.7625
## Median :0.7599 Median :0.9883 Median :0.8832 Median :0.8013
## Mean :0.7497 Mean :0.9871 Mean :0.8733 Mean :0.7999
## 3rd Qu.:0.7869 3rd Qu.:0.9900 3rd Qu.:0.9169 3rd Qu.:0.8343
## Max. :0.8662 Max. :0.9947 Max. :0.9907 Max. :0.9873
## ShapeFactor1 ShapeFactor2 ShapeFactor3 ShapeFactor4
## Min. :0.002778 Min. :0.0005642 Min. :0.4103 Min. :0.9477
## 1st Qu.:0.005900 1st Qu.:0.0011535 1st Qu.:0.5814 1st Qu.:0.9937
## Median :0.006645 Median :0.0016935 Median :0.6420 Median :0.9964
## Mean :0.006564 Mean :0.0017159 Mean :0.6436 Mean :0.9951
## 3rd Qu.:0.007271 3rd Qu.:0.0021703 3rd Qu.:0.6960 3rd Qu.:0.9979
## Max. :0.010451 Max. :0.0036650 Max. :0.9748 Max. :0.9997
## Class
## Length:13611
## Class :character
## Mode :character
##
##
##
#Poker Hand **
poker.hand.training.true <- read.csv("~/Downloads/poker+hand/poker-hand-training-true.data", header=FALSE)
head(str(poker.hand.training.true))
## 'data.frame': 25010 obs. of 11 variables:
## $ V1 : int 1 2 3 4 4 1 1 2 3 4 ...
## $ V2 : int 10 11 12 10 1 2 9 1 5 1 ...
## $ V3 : int 1 2 3 4 4 1 1 2 3 4 ...
## $ V4 : int 11 13 11 11 13 4 12 2 6 4 ...
## $ V5 : int 1 2 3 4 4 1 1 2 3 4 ...
## $ V6 : int 13 10 13 1 12 5 10 3 9 2 ...
## $ V7 : int 1 2 3 4 4 1 1 2 3 4 ...
## $ V8 : int 12 12 10 13 11 3 11 4 7 3 ...
## $ V9 : int 1 2 3 4 4 1 1 2 3 4 ...
## $ V10: int 1 1 1 12 10 6 13 5 8 5 ...
## $ V11: int 9 9 9 9 9 8 8 8 8 8 ...
## NULL
#diaabetes 130
diabetic_data <- read.csv("~/Desktop/NCU/DissertationDatasets/diabetes+130-us+hospitals+for+years+1999-2008/diabetic_data.csv", header=TRUE)
head(str(diabetic_data))
## 'data.frame': 101766 obs. of 50 variables:
## $ encounter_id : int 2278392 149190 64410 500364 16680 35754 55842 63768 12522 15738 ...
## $ patient_nbr : int 8222157 55629189 86047875 82442376 42519267 82637451 84259809 114882984 48330783 63555939 ...
## $ race : chr "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
## $ weight : chr "?" "?" "?" "?" ...
## $ admission_type_id : int 6 1 1 1 1 2 3 1 2 3 ...
## $ discharge_disposition_id: int 25 1 1 1 1 1 1 1 1 3 ...
## $ admission_source_id : int 1 7 7 7 7 2 2 7 4 4 ...
## $ time_in_hospital : int 1 3 2 2 1 3 4 5 13 12 ...
## $ payer_code : chr "?" "?" "?" "?" ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" "?" "?" "?" ...
## $ num_lab_procedures : int 41 59 11 44 51 31 70 73 68 33 ...
## $ num_procedures : int 0 0 5 1 0 6 1 0 2 3 ...
## $ num_medications : int 1 18 13 16 8 16 21 12 28 18 ...
## $ number_outpatient : int 0 0 2 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 1 0 0 0 0 0 0 0 ...
## $ diag_1 : chr "250.83" "276" "648" "8" ...
## $ diag_2 : chr "?" "250.01" "250" "250.43" ...
## $ diag_3 : chr "?" "255" "V27" "403" ...
## $ number_diagnoses : int 1 9 6 7 5 9 7 8 8 8 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "No" "No" ...
## $ repaglinide : chr "No" "No" "No" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ acetohexamide : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "Steady" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "No" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ troglitazone : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ examide : chr "No" "No" "No" "No" ...
## $ citoglipton : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Up" "No" "Up" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ glipizide.metformin : chr "No" "No" "No" "No" ...
## $ glimepiride.pioglitazone: chr "No" "No" "No" "No" ...
## $ metformin.rosiglitazone : chr "No" "No" "No" "No" ...
## $ metformin.pioglitazone : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "No" "Ch" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted : chr "NO" ">30" "NO" "NO" ...
## NULL
#Taiwanese Bankruptcy Prediction **
data <- read.csv("~/Desktop/NCU/DissertationDatasets/data.csv")
head(str(data))
## 'data.frame': 6819 obs. of 96 variables:
## $ Bankrupt. : int 1 1 1 1 1 1 0 0 0 0 ...
## $ ROA.C..before.interest.and.depreciation.before.interest: num 0.371 0.464 0.426 0.4 0.465 ...
## $ ROA.A..before.interest.and...after.tax : num 0.424 0.538 0.499 0.451 0.538 ...
## $ ROA.B..before.interest.and.depreciation.after.tax : num 0.406 0.517 0.472 0.458 0.522 ...
## $ Operating.Gross.Margin : num 0.601 0.61 0.601 0.584 0.599 ...
## $ Realized.Sales.Gross.Margin : num 0.601 0.61 0.601 0.584 0.599 ...
## $ Operating.Profit.Rate : num 0.999 0.999 0.999 0.999 0.999 ...
## $ Pre.tax.net.Interest.Rate : num 0.797 0.797 0.796 0.797 0.797 ...
## $ After.tax.net.Interest.Rate : num 0.809 0.809 0.808 0.809 0.809 ...
## $ Non.industry.income.and.expenditure.revenue : num 0.303 0.304 0.302 0.303 0.303 ...
## $ Continuous.interest.rate..after.tax. : num 0.781 0.782 0.78 0.781 0.782 ...
## $ Operating.Expense.Rate : num 1.26e-04 2.90e-04 2.36e-04 1.08e-04 7.89e+09 ...
## $ Research.and.development.expense.rate : num 0.00 0.00 2.55e+07 0.00 0.00 0.00 7.30e+08 5.09e+07 0.00 0.00 ...
## $ Cash.flow.rate : num 0.458 0.462 0.459 0.466 0.463 ...
## $ Interest.bearing.debt.interest.rate : num 0.000725 0.000647 0.00079 0.000449 0.000686 ...
## $ Tax.rate..A. : num 0 0 0 0 0 ...
## $ Net.Value.Per.Share..B. : num 0.148 0.182 0.178 0.154 0.168 ...
## $ Net.Value.Per.Share..A. : num 0.148 0.182 0.178 0.154 0.168 ...
## $ Net.Value.Per.Share..C. : num 0.148 0.182 0.194 0.154 0.168 ...
## $ Persistent.EPS.in.the.Last.Four.Seasons : num 0.169 0.209 0.181 0.194 0.213 ...
## $ Cash.Flow.Per.Share : num 0.312 0.318 0.307 0.322 0.319 ...
## $ Revenue.Per.Share..Yuan... : num 0.01756 0.02114 0.00594 0.01437 0.02969 ...
## $ Operating.Profit.Per.Share..Yuan... : num 0.0959 0.0937 0.0923 0.0778 0.0969 ...
## $ Per.Share.Net.profit.before.tax..Yuan... : num 0.139 0.17 0.143 0.149 0.168 ...
## $ Realized.Sales.Gross.Profit.Growth.Rate : num 0.0221 0.0221 0.0228 0.022 0.0221 ...
## $ Operating.Profit.Growth.Rate : num 0.848 0.848 0.848 0.848 0.848 ...
## $ After.tax.Net.Profit.Growth.Rate : num 0.689 0.69 0.689 0.689 0.69 ...
## $ Regular.Net.Profit.Growth.Rate : num 0.689 0.69 0.689 0.689 0.69 ...
## $ Continuous.Net.Profit.Growth.Rate : num 0.218 0.218 0.218 0.218 0.218 ...
## $ Total.Asset.Growth.Rate : num 4.98e+09 6.11e+09 7.28e+09 4.88e+09 5.51e+09 6.08e+08 5.72e+09 6.63e+09 6.89e+09 5.55e+09 ...
## $ Net.Value.Growth.Rate : num 0.000327 0.000443 0.000396 0.000382 0.000439 ...
## $ Total.Asset.Return.Growth.Rate.Ratio : num 0.263 0.265 0.264 0.263 0.265 ...
## $ Cash.Reinvestment.. : num 0.364 0.377 0.369 0.384 0.38 ...
## $ Current.Ratio : num 0.00226 0.00602 0.01154 0.00419 0.00602 ...
## $ Quick.Ratio : num 0.00121 0.00404 0.00535 0.0029 0.00373 ...
## $ Interest.Expense.Ratio : num 0.63 0.635 0.63 0.63 0.636 ...
## $ Total.debt.Total.net.worth : num 0.02127 0.0125 0.02125 0.00957 0.00515 ...
## $ Debt.ratio.. : num 0.208 0.171 0.208 0.151 0.107 ...
## $ Net.worth.Assets : num 0.792 0.829 0.792 0.849 0.893 ...
## $ Long.term.fund.suitability.ratio..A. : num 0.00502 0.00506 0.0051 0.00505 0.0053 ...
## $ Borrowing.dependency : num 0.39 0.377 0.379 0.38 0.375 ...
## $ Contingent.liabilities.Net.worth : num 0.00648 0.00584 0.00656 0.00537 0.00662 ...
## $ Operating.profit.Paid.in.capital : num 0.0959 0.0937 0.0923 0.0777 0.0969 ...
## $ Net.profit.before.tax.Paid.in.capital : num 0.138 0.169 0.148 0.148 0.167 ...
## $ Inventory.and.accounts.receivable.Net.value : num 0.398 0.398 0.407 0.398 0.4 ...
## $ Total.Asset.Turnover : num 0.087 0.0645 0.015 0.09 0.1754 ...
## $ Accounts.Receivable.Turnover : num 0.00181 0.00129 0.0015 0.00197 0.00145 ...
## $ Average.Collection.Days : num 0.00349 0.00492 0.00423 0.00321 0.00437 ...
## $ Inventory.Turnover.Rate..times. : num 1.82e-04 9.36e+09 6.50e+07 7.13e+09 1.63e-04 ...
## $ Fixed.Assets.Turnover.Frequency : num 1.17e-04 7.19e+08 2.65e+09 9.15e+09 2.94e-04 ...
## $ Net.Worth.Turnover.Rate..times. : num 0.0329 0.0255 0.0134 0.0281 0.0402 ...
## $ Revenue.per.person : num 0.03416 0.00689 0.029 0.01546 0.05811 ...
## $ Operating.profit.per.person : num 0.393 0.392 0.382 0.378 0.394 ...
## $ Allocation.rate.per.person : num 0.0371 0.0123 0.141 0.0213 0.024 ...
## $ Working.Capital.to.Total.Assets : num 0.673 0.751 0.83 0.726 0.752 ...
## $ Quick.Assets.Total.Assets : num 0.167 0.127 0.34 0.162 0.26 ...
## $ Current.Assets.Total.Assets : num 0.191 0.182 0.603 0.226 0.358 ...
## $ Cash.Total.Assets : num 0.004094 0.014948 0.000991 0.018851 0.014161 ...
## $ Quick.Assets.Current.Liability : num 0.002 0.00414 0.0063 0.00296 0.00427 ...
## $ Cash.Current.Liability : num 1.47e-04 1.38e-03 5.34e+09 1.01e-03 6.80e-04 ...
## $ Current.Liability.to.Assets : num 0.1473 0.057 0.0982 0.0987 0.1102 ...
## $ Operating.Funds.to.Liability : num 0.334 0.341 0.337 0.349 0.345 ...
## $ Inventory.Working.Capital : num 0.277 0.29 0.277 0.277 0.288 ...
## $ Inventory.Current.Liability : num 0.00104 0.00521 0.01388 0.00354 0.00487 ...
## $ Current.Liabilities.Liability : num 0.676 0.309 0.446 0.616 0.975 ...
## $ Working.Capital.Equity : num 0.721 0.732 0.743 0.73 0.732 ...
## $ Current.Liabilities.Equity : num 0.339 0.33 0.335 0.332 0.331 ...
## $ Long.term.Liability.to.Current.Assets : num 0.02559 0.02395 0.00372 0.02217 0 ...
## $ Retained.Earnings.to.Total.Assets : num 0.903 0.931 0.91 0.907 0.914 ...
## $ Total.income.Total.expense : num 0.00202 0.00223 0.00206 0.00183 0.00222 ...
## $ Total.expense.Assets : num 0.0649 0.0255 0.0214 0.0242 0.0264 ...
## $ Current.Asset.Turnover.Rate : num 7.01e+08 1.07e-04 1.79e-03 8.14e+09 6.68e+09 ...
## $ Quick.Asset.Turnover.Rate : num 6.55e+09 7.70e+09 1.02e-03 6.05e+09 5.05e+09 ...
## $ Working.capitcal.Turnover.Rate : num 0.594 0.594 0.595 0.594 0.594 ...
## $ Cash.Turnover.Rate : num 4.58e+08 2.49e+09 7.61e+08 2.03e+09 8.24e+08 ...
## $ Cash.Flow.to.Sales : num 0.672 0.672 0.672 0.672 0.672 ...
## $ Fixed.Assets.to.Assets : num 0.424 0.469 0.276 0.559 0.31 ...
## $ Current.Liability.to.Liability : num 0.676 0.309 0.446 0.616 0.975 ...
## $ Current.Liability.to.Equity : num 0.339 0.33 0.335 0.332 0.331 ...
## $ Equity.to.Long.term.Liability : num 0.127 0.121 0.118 0.121 0.111 ...
## $ Cash.Flow.to.Total.Assets : num 0.638 0.641 0.643 0.579 0.622 ...
## $ Cash.Flow.to.Liability : num 0.459 0.459 0.459 0.449 0.454 ...
## $ CFO.to.Assets : num 0.52 0.567 0.538 0.604 0.578 ...
## $ Cash.Flow.to.Equity : num 0.313 0.314 0.315 0.302 0.312 ...
## $ Current.Liability.to.Current.Assets : num 0.1183 0.0478 0.0253 0.0672 0.0477 ...
## $ Liability.Assets.Flag : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Net.Income.to.Total.Assets : num 0.717 0.795 0.775 0.74 0.795 ...
## $ Total.assets.to.GNP.price : num 0.00922 0.00832 0.04 0.00325 0.00388 ...
## $ No.credit.Interval : num 0.623 0.624 0.624 0.623 0.624 ...
## $ Gross.Profit.to.Sales : num 0.601 0.61 0.601 0.584 0.599 ...
## $ Net.Income.to.Stockholder.s.Equity : num 0.828 0.84 0.837 0.835 0.84 ...
## $ Liability.to.Equity : num 0.29 0.284 0.29 0.282 0.279 ...
## $ Degree.of.Financial.Leverage..DFL. : num 0.0266 0.2646 0.0266 0.0267 0.0248 ...
## $ Interest.Coverage.Ratio..Interest.expense.to.EBIT. : num 0.564 0.57 0.564 0.565 0.576 ...
## $ Net.Income.Flag : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Equity.to.Liability : num 0.0165 0.0208 0.0165 0.024 0.0355 ...
## NULL
summary(data)
## Bankrupt. ROA.C..before.interest.and.depreciation.before.interest
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.4765
## Median :0.00000 Median :0.5027
## Mean :0.03226 Mean :0.5052
## 3rd Qu.:0.00000 3rd Qu.:0.5356
## Max. :1.00000 Max. :1.0000
## ROA.A..before.interest.and...after.tax
## Min. :0.0000
## 1st Qu.:0.5355
## Median :0.5598
## Mean :0.5586
## 3rd Qu.:0.5892
## Max. :1.0000
## ROA.B..before.interest.and.depreciation.after.tax Operating.Gross.Margin
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5273 1st Qu.:0.6004
## Median :0.5523 Median :0.6060
## Mean :0.5536 Mean :0.6079
## 3rd Qu.:0.5841 3rd Qu.:0.6139
## Max. :1.0000 Max. :1.0000
## Realized.Sales.Gross.Margin Operating.Profit.Rate Pre.tax.net.Interest.Rate
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6004 1st Qu.:0.9990 1st Qu.:0.7974
## Median :0.6060 Median :0.9990 Median :0.7975
## Mean :0.6079 Mean :0.9988 Mean :0.7972
## 3rd Qu.:0.6138 3rd Qu.:0.9991 3rd Qu.:0.7976
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## After.tax.net.Interest.Rate Non.industry.income.and.expenditure.revenue
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8093 1st Qu.:0.3035
## Median :0.8094 Median :0.3035
## Mean :0.8091 Mean :0.3036
## 3rd Qu.:0.8095 3rd Qu.:0.3036
## Max. :1.0000 Max. :1.0000
## Continuous.interest.rate..after.tax. Operating.Expense.Rate
## Min. :0.0000 Min. :0.000e+00
## 1st Qu.:0.7816 1st Qu.:0.000e+00
## Median :0.7816 Median :0.000e+00
## Mean :0.7814 Mean :1.995e+09
## 3rd Qu.:0.7817 3rd Qu.:4.145e+09
## Max. :1.0000 Max. :9.990e+09
## Research.and.development.expense.rate Cash.flow.rate
## Min. :0.00e+00 Min. :0.0000
## 1st Qu.:0.00e+00 1st Qu.:0.4616
## Median :5.09e+08 Median :0.4651
## Mean :1.95e+09 Mean :0.4674
## 3rd Qu.:3.45e+09 3rd Qu.:0.4710
## Max. :9.98e+09 Max. :1.0000
## Interest.bearing.debt.interest.rate Tax.rate..A. Net.Value.Per.Share..B.
## Min. : 0 Min. :0.00000 Min. :0.0000
## 1st Qu.: 0 1st Qu.:0.00000 1st Qu.:0.1736
## Median : 0 Median :0.07349 Median :0.1844
## Mean : 16448013 Mean :0.11500 Mean :0.1907
## 3rd Qu.: 0 3rd Qu.:0.20584 3rd Qu.:0.1996
## Max. :990000000 Max. :1.00000 Max. :1.0000
## Net.Value.Per.Share..A. Net.Value.Per.Share..C.
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1736 1st Qu.:0.1737
## Median :0.1844 Median :0.1844
## Mean :0.1906 Mean :0.1907
## 3rd Qu.:0.1996 3rd Qu.:0.1996
## Max. :1.0000 Max. :1.0000
## Persistent.EPS.in.the.Last.Four.Seasons Cash.Flow.Per.Share
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2147 1st Qu.:0.3177
## Median :0.2245 Median :0.3225
## Mean :0.2288 Mean :0.3235
## 3rd Qu.:0.2388 3rd Qu.:0.3286
## Max. :1.0000 Max. :1.0000
## Revenue.Per.Share..Yuan... Operating.Profit.Per.Share..Yuan...
## Min. :0.000e+00 Min. :0.00000
## 1st Qu.:0.000e+00 1st Qu.:0.09608
## Median :0.000e+00 Median :0.10423
## Mean :1.329e+06 Mean :0.10909
## 3rd Qu.:0.000e+00 3rd Qu.:0.11615
## Max. :3.020e+09 Max. :1.00000
## Per.Share.Net.profit.before.tax..Yuan...
## Min. :0.0000
## 1st Qu.:0.1704
## Median :0.1797
## Mean :0.1844
## 3rd Qu.:0.1935
## Max. :1.0000
## Realized.Sales.Gross.Profit.Growth.Rate Operating.Profit.Growth.Rate
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.02206 1st Qu.:0.8480
## Median :0.02210 Median :0.8480
## Mean :0.02241 Mean :0.8480
## 3rd Qu.:0.02215 3rd Qu.:0.8481
## Max. :1.00000 Max. :1.0000
## After.tax.Net.Profit.Growth.Rate Regular.Net.Profit.Growth.Rate
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6893 1st Qu.:0.6893
## Median :0.6894 Median :0.6894
## Mean :0.6891 Mean :0.6892
## 3rd Qu.:0.6896 3rd Qu.:0.6896
## Max. :1.0000 Max. :1.0000
## Continuous.Net.Profit.Growth.Rate Total.Asset.Growth.Rate
## Min. :0.0000 Min. :0.000e+00
## 1st Qu.:0.2176 1st Qu.:4.860e+09
## Median :0.2176 Median :6.400e+09
## Mean :0.2176 Mean :5.508e+09
## 3rd Qu.:0.2176 3rd Qu.:7.390e+09
## Max. :1.0000 Max. :9.990e+09
## Net.Value.Growth.Rate Total.Asset.Return.Growth.Rate.Ratio Cash.Reinvestment..
## Min. :0.000e+00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000e+00 1st Qu.:0.2638 1st Qu.:0.3747
## Median :0.000e+00 Median :0.2640 Median :0.3804
## Mean :1.566e+06 Mean :0.2642 Mean :0.3797
## 3rd Qu.:0.000e+00 3rd Qu.:0.2644 3rd Qu.:0.3867
## Max. :9.330e+09 Max. :1.0000 Max. :1.0000
## Current.Ratio Quick.Ratio Interest.Expense.Ratio
## Min. :0.000e+00 Min. :0.000e+00 Min. :0.0000
## 1st Qu.:0.000e+00 1st Qu.:0.000e+00 1st Qu.:0.6306
## Median :0.000e+00 Median :0.000e+00 Median :0.6307
## Mean :4.033e+05 Mean :8.377e+06 Mean :0.6310
## 3rd Qu.:0.000e+00 3rd Qu.:0.000e+00 3rd Qu.:0.6311
## Max. :2.750e+09 Max. :9.230e+09 Max. :1.0000
## Total.debt.Total.net.worth Debt.ratio.. Net.worth.Assets
## Min. :0.000e+00 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.000e+00 1st Qu.:0.07289 1st Qu.:0.8512
## Median :0.000e+00 Median :0.11141 Median :0.8886
## Mean :4.416e+06 Mean :0.11318 Mean :0.8868
## 3rd Qu.:0.000e+00 3rd Qu.:0.14880 3rd Qu.:0.9271
## Max. :9.940e+09 Max. :1.00000 Max. :1.0000
## Long.term.fund.suitability.ratio..A. Borrowing.dependency
## Min. :0.000000 Min. :0.0000
## 1st Qu.:0.005244 1st Qu.:0.3702
## Median :0.005665 Median :0.3726
## Mean :0.008783 Mean :0.3747
## 3rd Qu.:0.006847 3rd Qu.:0.3763
## Max. :1.000000 Max. :1.0000
## Contingent.liabilities.Net.worth Operating.profit.Paid.in.capital
## Min. :0.000000 Min. :0.0000
## 1st Qu.:0.005366 1st Qu.:0.0961
## Median :0.005366 Median :0.1041
## Mean :0.005968 Mean :0.1090
## 3rd Qu.:0.005764 3rd Qu.:0.1159
## Max. :1.000000 Max. :1.0000
## Net.profit.before.tax.Paid.in.capital
## Min. :0.0000
## 1st Qu.:0.1694
## Median :0.1785
## Mean :0.1827
## 3rd Qu.:0.1916
## Max. :1.0000
## Inventory.and.accounts.receivable.Net.value Total.Asset.Turnover
## Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3974 1st Qu.:0.07646
## Median :0.4001 Median :0.11844
## Mean :0.4025 Mean :0.14161
## 3rd Qu.:0.4046 3rd Qu.:0.17691
## Max. :1.0000 Max. :1.00000
## Accounts.Receivable.Turnover Average.Collection.Days
## Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :0.000e+00 Median :0.000e+00
## Mean :1.279e+07 Mean :9.826e+06
## 3rd Qu.:0.000e+00 3rd Qu.:0.000e+00
## Max. :9.740e+09 Max. :9.730e+09
## Inventory.Turnover.Rate..times. Fixed.Assets.Turnover.Frequency
## Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :0.000e+00 Median :0.000e+00
## Mean :2.149e+09 Mean :1.009e+09
## 3rd Qu.:4.620e+09 3rd Qu.:0.000e+00
## Max. :9.990e+09 Max. :9.990e+09
## Net.Worth.Turnover.Rate..times. Revenue.per.person
## Min. :0.00000 Min. :0.000e+00
## 1st Qu.:0.02177 1st Qu.:0.000e+00
## Median :0.02952 Median :0.000e+00
## Mean :0.03860 Mean :2.326e+06
## 3rd Qu.:0.04290 3rd Qu.:0.000e+00
## Max. :1.00000 Max. :8.810e+09
## Operating.profit.per.person Allocation.rate.per.person
## Min. :0.0000 Min. :0.000e+00
## 1st Qu.:0.3924 1st Qu.:0.000e+00
## Median :0.3959 Median :0.000e+00
## Mean :0.4007 Mean :1.126e+07
## 3rd Qu.:0.4019 3rd Qu.:0.000e+00
## Max. :1.0000 Max. :9.570e+09
## Working.Capital.to.Total.Assets Quick.Assets.Total.Assets
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.7743 1st Qu.:0.2420
## Median :0.8103 Median :0.3865
## Mean :0.8141 Mean :0.4001
## 3rd Qu.:0.8504 3rd Qu.:0.5406
## Max. :1.0000 Max. :1.0000
## Current.Assets.Total.Assets Cash.Total.Assets Quick.Assets.Current.Liability
## Min. :0.0000 Min. :0.00000 Min. :0.000e+00
## 1st Qu.:0.3528 1st Qu.:0.03354 1st Qu.:0.000e+00
## Median :0.5148 Median :0.07489 Median :0.000e+00
## Mean :0.5223 Mean :0.12409 Mean :3.593e+06
## 3rd Qu.:0.6891 3rd Qu.:0.16107 3rd Qu.:0.000e+00
## Max. :1.0000 Max. :1.00000 Max. :8.820e+09
## Cash.Current.Liability Current.Liability.to.Assets
## Min. :0.000e+00 Min. :0.00000
## 1st Qu.:0.000e+00 1st Qu.:0.05330
## Median :0.000e+00 Median :0.08270
## Mean :3.716e+07 Mean :0.09067
## 3rd Qu.:0.000e+00 3rd Qu.:0.11952
## Max. :9.650e+09 Max. :1.00000
## Operating.Funds.to.Liability Inventory.Working.Capital
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3410 1st Qu.:0.2770
## Median :0.3486 Median :0.2772
## Mean :0.3538 Mean :0.2774
## 3rd Qu.:0.3609 3rd Qu.:0.2774
## Max. :1.0000 Max. :1.0000
## Inventory.Current.Liability Current.Liabilities.Liability
## Min. :0.000e+00 Min. :0.0000
## 1st Qu.:0.000e+00 1st Qu.:0.6270
## Median :0.000e+00 Median :0.8069
## Mean :5.581e+07 Mean :0.7616
## 3rd Qu.:0.000e+00 3rd Qu.:0.9420
## Max. :9.910e+09 Max. :1.0000
## Working.Capital.Equity Current.Liabilities.Equity
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.7336 1st Qu.:0.3281
## Median :0.7360 Median :0.3297
## Mean :0.7358 Mean :0.3314
## 3rd Qu.:0.7386 3rd Qu.:0.3323
## Max. :1.0000 Max. :1.0000
## Long.term.Liability.to.Current.Assets Retained.Earnings.to.Total.Assets
## Min. :0.000e+00 Min. :0.0000
## 1st Qu.:0.000e+00 1st Qu.:0.9311
## Median :0.000e+00 Median :0.9377
## Mean :5.416e+07 Mean :0.9347
## 3rd Qu.:0.000e+00 3rd Qu.:0.9448
## Max. :9.540e+09 Max. :1.0000
## Total.income.Total.expense Total.expense.Assets Current.Asset.Turnover.Rate
## Min. :0.000000 Min. :0.00000 Min. :0.000e+00
## 1st Qu.:0.002236 1st Qu.:0.01457 1st Qu.:0.000e+00
## Median :0.002336 Median :0.02267 Median :0.000e+00
## Mean :0.002549 Mean :0.02918 Mean :1.196e+09
## 3rd Qu.:0.002492 3rd Qu.:0.03593 3rd Qu.:0.000e+00
## Max. :1.000000 Max. :1.00000 Max. :1.000e+10
## Quick.Asset.Turnover.Rate Working.capitcal.Turnover.Rate Cash.Turnover.Rate
## Min. :0.000e+00 Min. :0.0000 Min. :0.000e+00
## 1st Qu.:0.000e+00 1st Qu.:0.5939 1st Qu.:0.000e+00
## Median :0.000e+00 Median :0.5940 Median :1.080e+09
## Mean :2.164e+09 Mean :0.5940 Mean :2.472e+09
## 3rd Qu.:4.900e+09 3rd Qu.:0.5940 3rd Qu.:4.510e+09
## Max. :1.000e+10 Max. :1.0000 Max. :1.000e+10
## Cash.Flow.to.Sales Fixed.Assets.to.Assets Current.Liability.to.Liability
## Min. :0.0000 Min. :0.00e+00 Min. :0.0000
## 1st Qu.:0.6716 1st Qu.:0.00e+00 1st Qu.:0.6270
## Median :0.6716 Median :0.00e+00 Median :0.8069
## Mean :0.6715 Mean :1.22e+06 Mean :0.7616
## 3rd Qu.:0.6716 3rd Qu.:0.00e+00 3rd Qu.:0.9420
## Max. :1.0000 Max. :8.32e+09 Max. :1.0000
## Current.Liability.to.Equity Equity.to.Long.term.Liability
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3281 1st Qu.:0.1109
## Median :0.3297 Median :0.1123
## Mean :0.3314 Mean :0.1156
## 3rd Qu.:0.3323 3rd Qu.:0.1171
## Max. :1.0000 Max. :1.0000
## Cash.Flow.to.Total.Assets Cash.Flow.to.Liability CFO.to.Assets
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6333 1st Qu.:0.4571 1st Qu.:0.5660
## Median :0.6454 Median :0.4598 Median :0.5933
## Mean :0.6497 Mean :0.4618 Mean :0.5934
## 3rd Qu.:0.6631 3rd Qu.:0.4642 3rd Qu.:0.6248
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## Cash.Flow.to.Equity Current.Liability.to.Current.Assets Liability.Assets.Flag
## Min. :0.0000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.3130 1st Qu.:0.01803 1st Qu.:0.000000
## Median :0.3150 Median :0.02760 Median :0.000000
## Mean :0.3156 Mean :0.03151 Mean :0.001173
## 3rd Qu.:0.3177 3rd Qu.:0.03837 3rd Qu.:0.000000
## Max. :1.0000 Max. :1.00000 Max. :1.000000
## Net.Income.to.Total.Assets Total.assets.to.GNP.price No.credit.Interval
## Min. :0.0000 Min. :0.000e+00 Min. :0.0000
## 1st Qu.:0.7967 1st Qu.:0.000e+00 1st Qu.:0.6236
## Median :0.8106 Median :0.000e+00 Median :0.6239
## Mean :0.8078 Mean :1.863e+07 Mean :0.6239
## 3rd Qu.:0.8265 3rd Qu.:0.000e+00 3rd Qu.:0.6242
## Max. :1.0000 Max. :9.820e+09 Max. :1.0000
## Gross.Profit.to.Sales Net.Income.to.Stockholder.s.Equity Liability.to.Equity
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6004 1st Qu.:0.8401 1st Qu.:0.2769
## Median :0.6060 Median :0.8412 Median :0.2788
## Mean :0.6079 Mean :0.8404 Mean :0.2804
## 3rd Qu.:0.6139 3rd Qu.:0.8424 3rd Qu.:0.2814
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## Degree.of.Financial.Leverage..DFL.
## Min. :0.00000
## 1st Qu.:0.02679
## Median :0.02681
## Mean :0.02754
## 3rd Qu.:0.02691
## Max. :1.00000
## Interest.Coverage.Ratio..Interest.expense.to.EBIT. Net.Income.Flag
## Min. :0.0000 Min. :1
## 1st Qu.:0.5652 1st Qu.:1
## Median :0.5653 Median :1
## Mean :0.5654 Mean :1
## 3rd Qu.:0.5657 3rd Qu.:1
## Max. :1.0000 Max. :1
## Equity.to.Liability
## Min. :0.00000
## 1st Qu.:0.02448
## Median :0.03380
## Mean :0.04758
## 3rd Qu.:0.05284
## Max. :1.00000
taiwanese_data<-data
#Bitcoin Heist Ransomware Address **
library(readr)
BitcoinHeistData.2 <- read.csv("~/Desktop/NCU/DissertationDatasets/BitcoinHeistData 2.csv")
head(str(BitcoinHeistData.2))
## 'data.frame': 2916697 obs. of 10 variables:
## $ address : chr "111K8kZAEnJg245r2cM6y9zgJGHZtJPy6" "1123pJv8jzeFQaCV4w644pzQJzVWay2zcA" "112536im7hy6wtKbpH1qYDWtTyMRAcA2p7" "1126eDRw2wqSkWosjTCre8cjjQW8sSeWH7" ...
## $ year : int 2017 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
## $ day : int 11 132 246 322 238 96 225 324 298 62 ...
## $ length : int 18 44 0 72 144 144 142 78 144 112 ...
## $ weight : num 0.008333 0.000244 1 0.003906 0.072848 ...
## $ count : int 1 1 1 1 456 2821 881 1 4220 1 ...
## $ looped : int 0 0 0 0 0 0 0 0 0 0 ...
## $ neighbors: int 2 1 2 2 1 1 2 2 2 1 ...
## $ income : num 1.00e+08 1.00e+08 2.00e+08 7.12e+07 2.00e+08 ...
## $ label : chr "princetonCerber" "princetonLocky" "princetonCerber" "princetonCerber" ...
## NULL
summary(BitcoinHeistData.2)
## address year day length
## Length:2916697 Min. :2011 Min. : 1.0 Min. : 0.00
## Class :character 1st Qu.:2013 1st Qu.: 92.0 1st Qu.: 2.00
## Mode :character Median :2014 Median :181.0 Median : 8.00
## Mean :2014 Mean :181.5 Mean : 45.01
## 3rd Qu.:2016 3rd Qu.:271.0 3rd Qu.:108.00
## Max. :2018 Max. :365.0 Max. :144.00
## weight count looped neighbors
## Min. : 0.0000 Min. : 1.0 Min. : 0.0 Min. : 1.000
## 1st Qu.: 0.0215 1st Qu.: 1.0 1st Qu.: 0.0 1st Qu.: 1.000
## Median : 0.2500 Median : 1.0 Median : 0.0 Median : 2.000
## Mean : 0.5455 Mean : 721.6 Mean : 238.5 Mean : 2.207
## 3rd Qu.: 0.8819 3rd Qu.: 56.0 3rd Qu.: 0.0 3rd Qu.: 2.000
## Max. :1943.7488 Max. :14497.0 Max. :14496.0 Max. :12920.000
## income label
## Min. :3.000e+07 Length:2916697
## 1st Qu.:7.429e+07 Class :character
## Median :2.000e+08 Mode :character
## Mean :4.465e+09
## 3rd Qu.:9.940e+08
## Max. :4.996e+13
#RT-IoT2022 **
library(readr)
RT_IOT2022 <- read.csv("~/Downloads/RT_IOT2022")
head(str(RT_IOT2022))
## 'data.frame': 123117 obs. of 85 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ id.orig_p : int 38667 51143 44761 60893 51087 48579 54063 33457 52181 53469 ...
## $ id.resp_p : int 1883 1883 1883 1883 1883 1883 1883 1883 1883 1883 ...
## $ proto : chr "tcp" "tcp" "tcp" "tcp" ...
## $ service : chr "mqtt" "mqtt" "mqtt" "mqtt" ...
## $ flow_duration : num 32 31.9 32.1 32 31.9 ...
## $ fwd_pkts_tot : int 9 9 9 9 9 9 9 9 9 9 ...
## $ bwd_pkts_tot : int 5 5 5 5 5 5 5 5 5 5 ...
## $ fwd_data_pkts_tot : int 3 3 3 3 3 3 3 3 3 3 ...
## $ bwd_data_pkts_tot : int 3 3 3 3 3 3 3 3 3 3 ...
## $ fwd_pkts_per_sec : num 0.281 0.282 0.28 0.282 0.282 ...
## $ bwd_pkts_per_sec : num 0.156 0.157 0.156 0.156 0.157 ...
## $ flow_pkts_per_sec : num 0.437 0.439 0.436 0.438 0.439 ...
## $ down_up_ratio : num 0.556 0.556 0.556 0.556 0.556 ...
## $ fwd_header_size_tot : int 296 296 296 296 296 296 296 296 296 296 ...
## $ fwd_header_size_min : int 32 32 32 32 32 32 32 32 32 32 ...
## $ fwd_header_size_max : int 40 40 40 40 40 40 40 40 40 40 ...
## $ bwd_header_size_tot : int 168 168 168 168 168 168 168 168 168 168 ...
## $ bwd_header_size_min : int 32 32 32 32 32 32 32 32 32 32 ...
## $ bwd_header_size_max : int 40 40 40 40 40 40 40 40 40 40 ...
## $ flow_FIN_flag_count : int 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_SYN_flag_count : int 2 2 2 2 2 2 2 2 2 2 ...
## $ flow_RST_flag_count : int 1 1 1 1 1 1 1 1 1 1 ...
## $ fwd_PSH_flag_count : int 3 3 3 3 3 3 3 3 3 3 ...
## $ bwd_PSH_flag_count : int 3 3 3 3 3 3 3 3 3 3 ...
## $ flow_ACK_flag_count : int 13 13 13 13 13 13 13 13 13 13 ...
## $ fwd_URG_flag_count : int 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_URG_flag_count : int 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_CWR_flag_count : int 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_ECE_flag_count : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_pkts_payload.max : num 33 33 33 33 33 33 33 33 33 33 ...
## $ fwd_pkts_payload.tot : num 76 76 74 74 76 76 76 76 76 76 ...
## $ fwd_pkts_payload.avg : num 8.44 8.44 8.22 8.22 8.44 ...
## $ fwd_pkts_payload.std : num 13.1 13.1 12.9 12.9 13.1 ...
## $ bwd_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_pkts_payload.max : num 23 23 21 21 23 23 23 23 23 23 ...
## $ bwd_pkts_payload.tot : num 32 32 30 30 32 32 32 32 32 32 ...
## $ bwd_pkts_payload.avg : num 6.4 6.4 6 6 6.4 6.4 6.4 6.4 6.4 6.4 ...
## $ bwd_pkts_payload.std : num 9.56 9.56 8.69 8.69 9.56 ...
## $ flow_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_pkts_payload.max : num 33 33 33 33 33 33 33 33 33 33 ...
## $ flow_pkts_payload.tot : num 108 108 104 104 108 108 108 108 108 108 ...
## $ flow_pkts_payload.avg : num 7.71 7.71 7.43 7.43 7.71 ...
## $ flow_pkts_payload.std : num 11.6 11.6 11.2 11.2 11.6 ...
## $ fwd_iat.min : num 762 247 284 289 388 ...
## $ fwd_iat.max : num 29729183 29855277 29842149 29913775 29814705 ...
## $ fwd_iat.tot : num 32011598 31883584 32124053 31961063 31902362 ...
## $ fwd_iat.avg : num 4001450 3985448 4015507 3995133 3987795 ...
## $ fwd_iat.std : num 10403074 10463456 10442378 10482528 10447019 ...
## $ bwd_iat.min : num 4439 4214 2457 3934 3005 ...
## $ bwd_iat.max : num 1511694 1576436 1476049 1551892 1632083 ...
## $ bwd_iat.tot : num 2026391 1876261 2013770 1883784 1935984 ...
## $ bwd_iat.avg : num 506598 469065 503442 470946 483996 ...
## $ bwd_iat.std : num 680406 741352 660344 724569 768543 ...
## $ flow_iat.min : num 762 247 284 289 388 ...
## $ flow_iat.max : num 29729183 29855277 29842149 29913775 29814705 ...
## $ flow_iat.tot : num 32011598 31883584 32124053 31961063 31902362 ...
## $ flow_iat.avg : num 2462431 2452583 2471081 2458543 2454028 ...
## $ flow_iat.std : num 8199747 8242459 8230593 8257786 8230584 ...
## $ payload_bytes_per_second: num 3.37 3.39 3.24 3.25 3.39 ...
## $ fwd_subflow_pkts : num 3 3 3 3 3 3 3 3 3 3 ...
## $ bwd_subflow_pkts : num 1.67 1.67 1.67 1.67 1.67 ...
## $ fwd_subflow_bytes : num 25.3 25.3 24.7 24.7 25.3 ...
## $ bwd_subflow_bytes : num 10.7 10.7 10 10 10.7 ...
## $ fwd_bulk_bytes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_bytes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_bulk_packets : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_packets : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_bulk_rate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_rate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ active.min : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.max : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.tot : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.avg : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.std : num 0 0 0 0 0 0 0 0 0 0 ...
## $ idle.min : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.max : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.tot : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.avg : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.std : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_init_window_size : int 64240 64240 64240 64240 64240 64240 64240 64240 64240 64240 ...
## $ bwd_init_window_size : int 26847 26847 26847 26847 26847 26847 26847 26847 26847 26847 ...
## $ fwd_last_window_size : int 502 502 502 502 502 502 502 502 502 502 ...
## $ Attack_type : chr "MQTT_Publish" "MQTT_Publish" "MQTT_Publish" "MQTT_Publish" ...
## NULL
summary(RT_IOT2022)
## X id.orig_p id.resp_p proto
## Min. : 0 Min. : 0 Min. : 0 Length:123117
## 1st Qu.: 6059 1st Qu.:17702 1st Qu.: 21 Class :character
## Median :33100 Median :37221 Median : 21 Mode :character
## Mean :37035 Mean :34639 Mean : 1014
## 3rd Qu.:63879 3rd Qu.:50971 3rd Qu.: 21
## Max. :94658 Max. :65535 Max. :65389
## service flow_duration fwd_pkts_tot bwd_pkts_tot
## Length:123117 Min. : 0.00 Min. : 0.000 Min. : 0.00
## Class :character 1st Qu.: 0.00 1st Qu.: 1.000 1st Qu.: 1.00
## Mode :character Median : 0.00 Median : 1.000 Median : 1.00
## Mean : 3.81 Mean : 2.269 Mean : 1.91
## 3rd Qu.: 0.00 3rd Qu.: 1.000 3rd Qu.: 1.00
## Max. :21728.34 Max. :4345.000 Max. :10112.00
## fwd_data_pkts_tot bwd_data_pkts_tot fwd_pkts_per_sec bwd_pkts_per_sec
## Min. : 0.000 Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 1.000 1st Qu.: 0.00 1st Qu.: 74.5 1st Qu.: 72.9
## Median : 1.000 Median : 0.00 Median : 246723.8 Median : 246723.8
## Mean : 1.471 Mean : 0.82 Mean : 351806.3 Mean : 351762.0
## 3rd Qu.: 1.000 3rd Qu.: 0.00 3rd Qu.: 524288.0 3rd Qu.: 524288.0
## Max. :4345.000 Max. :10105.00 Max. :1048576.0 Max. :1048576.0
## flow_pkts_per_sec down_up_ratio fwd_header_size_tot fwd_header_size_min
## Min. : 0.0 Min. :0.0000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 149.1 1st Qu.:1.0000 1st Qu.: 20.00 1st Qu.:20.00
## Median : 493447.5 Median :1.0000 Median : 20.00 Median :20.00
## Mean : 703568.3 Mean :0.8546 Mean : 53.89 Mean :19.78
## 3rd Qu.:1048576.0 3rd Qu.:1.0000 3rd Qu.: 20.00 3rd Qu.:20.00
## Max. :2097152.0 Max. :6.0879 Max. :69296.00 Max. :44.00
## fwd_header_size_max bwd_header_size_tot bwd_header_size_min
## Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.:20.00 1st Qu.: 20.0 1st Qu.:20.0
## Median :20.00 Median : 20.0 Median :20.0
## Mean :20.65 Mean : 46.6 Mean :17.7
## 3rd Qu.:20.00 3rd Qu.: 20.0 3rd Qu.:20.0
## Max. :52.00 Max. :323592.0 Max. :40.0
## bwd_header_size_max flow_FIN_flag_count flow_SYN_flag_count
## Min. : 0.00 Min. : 0.0000 Min. :0.0000
## 1st Qu.:20.00 1st Qu.: 0.0000 1st Qu.:1.0000
## Median :20.00 Median : 0.0000 Median :1.0000
## Mean :18.43 Mean : 0.1156 Mean :0.9509
## 3rd Qu.:20.00 3rd Qu.: 0.0000 3rd Qu.:1.0000
## Max. :44.00 Max. :10.0000 Max. :8.0000
## flow_RST_flag_count fwd_PSH_flag_count bwd_PSH_flag_count flow_ACK_flag_count
## Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 0.000
## 1st Qu.: 1.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 1.000
## Median : 1.0000 Median : 0.0000 Median : 0.0000 Median : 1.000
## Mean : 0.7965 Mean : 0.3513 Mean : 0.3936 Mean : 2.678
## 3rd Qu.: 1.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.000
## Max. :10.0000 Max. :864.0000 Max. :1446.0000 Max. :11772.000
## fwd_URG_flag_count bwd_URG_flag_count flow_CWR_flag_count flow_ECE_flag_count
## Min. :0.00000 Min. :0 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0 Median :0.000000 Median :0.000000
## Mean :0.01629 Mean :0 Mean :0.001007 Mean :0.000699
## 3rd Qu.:0.00000 3rd Qu.:0 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :0 Max. :4.000000 Max. :4.000000
## fwd_pkts_payload.min fwd_pkts_payload.max fwd_pkts_payload.tot
## Min. : 0.00 Min. : 0.0 Min. : 0.0
## 1st Qu.: 120.00 1st Qu.: 120.0 1st Qu.: 120.0
## Median : 120.00 Median : 120.0 Median : 120.0
## Mean : 96.26 Mean : 120.7 Mean : 221.5
## 3rd Qu.: 120.00 3rd Qu.: 120.0 3rd Qu.: 120.0
## Max. :1097.00 Max. :1420.0 Max. :747340.0
## fwd_pkts_payload.avg fwd_pkts_payload.std bwd_pkts_payload.min
## Min. : 0.0 Min. : 0.000 Min. : 0.000
## 1st Qu.: 120.0 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 120.0 Median : 0.000 Median : 0.000
## Mean : 100.5 Mean : 8.108 Mean : 3.817
## 3rd Qu.: 120.0 3rd Qu.: 0.000 3rd Qu.: 0.000
## Max. :1319.4 Max. :731.579 Max. :1357.000
## bwd_pkts_payload.max bwd_pkts_payload.tot bwd_pkts_payload.avg
## Min. : 0.0 Min. : 0 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0.00
## Median : 0.0 Median : 0 Median : 0.00
## Mean : 52.4 Mean : 513 Mean : 18.79
## 3rd Qu.: 0.0 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :5124.0 Max. :13610415 Max. :1457.05
## bwd_pkts_payload.std flow_pkts_payload.min flow_pkts_payload.max
## Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 120.0
## Median : 0.00 Median : 0.00 Median : 120.0
## Mean : 20.55 Mean : 13.55 Mean : 148.5
## 3rd Qu.: 0.00 3rd Qu.: 0.00 3rd Qu.: 120.0
## Max. :1506.01 Max. :1097.00 Max. :5124.0
## flow_pkts_payload.tot flow_pkts_payload.avg flow_pkts_payload.std
## Min. : 0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 120 1st Qu.: 60.00 1st Qu.: 50.22
## Median : 120 Median : 60.00 Median : 84.85
## Mean : 735 Mean : 65.01 Mean : 76.04
## 3rd Qu.: 120 3rd Qu.: 60.00 3rd Qu.: 84.85
## Max. :13610585 Max. :1156.08 Max. :924.65
## fwd_iat.min fwd_iat.max fwd_iat.tot
## Min. : 0 Min. : 0 Min. :0.000e+00
## 1st Qu.: 0 1st Qu.: 0 1st Qu.:0.000e+00
## Median : 0 Median : 0 Median :0.000e+00
## Mean : 8843 Mean : 1721566 Mean :3.780e+06
## 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.:0.000e+00
## Max. :300252571 Max. :300252571 Max. :2.173e+10
## fwd_iat.avg fwd_iat.std bwd_iat.min bwd_iat.max
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0
## Median : 0 Median : 0 Median : 0 Median : 0
## Mean : 237357 Mean : 577557 Mean : 3765 Mean : 407727
## 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0
## Max. :300252571 Max. :212296532 Max. :43196220 Max. :300028179
## bwd_iat.tot bwd_iat.avg bwd_iat.std flow_iat.min
## Min. :0.000e+00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:0.000e+00 1st Qu.: 0 1st Qu.: 0 1st Qu.: 1
## Median :0.000e+00 Median : 0 Median : 0 Median : 4
## Mean :1.780e+06 Mean : 87652 Mean : 147480 Mean : 4283
## 3rd Qu.:0.000e+00 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 5
## Max. :1.876e+10 Max. :150148934 Max. :211961260 Max. :43510042
## flow_iat.max flow_iat.tot flow_iat.avg flow_iat.std
## Min. : 0 Min. :0.000e+00 Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.:1.000e+00 1st Qu.: 1 1st Qu.: 0
## Median : 4 Median :4.000e+00 Median : 4 Median : 0
## Mean : 1725999 Mean :3.811e+06 Mean : 139654 Mean : 450136
## 3rd Qu.: 5 3rd Qu.:5.000e+00 3rd Qu.: 5 3rd Qu.: 0
## Max. :299999988 Max. :2.173e+10 Max. :72835758 Max. :134122073
## payload_bytes_per_second fwd_subflow_pkts bwd_subflow_pkts
## Min. : 0 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2581 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 29606852 Median : 1.000 Median : 1.000
## Mean : 41053452 Mean : 1.552 Mean : 1.338
## 3rd Qu.: 55924053 3rd Qu.: 1.000 3rd Qu.: 1.000
## Max. :125829120 Max. :276.833 Max. :1685.333
## fwd_subflow_bytes bwd_subflow_bytes fwd_bulk_bytes bwd_bulk_bytes
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 120.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0
## Median : 120.0 Median : 0.0 Median : 0.0 Median : 0
## Mean : 136.5 Mean : 217.5 Mean : 19.2 Mean : 155
## 3rd Qu.: 120.0 3rd Qu.: 0.0 3rd Qu.: 0.0 3rd Qu.: 0
## Max. :52067.8 Max. :2268402.5 Max. :465095.0 Max. :6805208
## fwd_bulk_packets bwd_bulk_packets fwd_bulk_rate bwd_bulk_rate
## Min. : 0.0000 Min. : 0.000 Min. : 0 Min. : 0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0 1st Qu.: 0
## Median : 0.0000 Median : 0.000 Median : 0 Median : 0
## Mean : 0.0241 Mean : 0.131 Mean : 3836 Mean : 48415
## 3rd Qu.: 0.0000 3rd Qu.: 0.000 3rd Qu.: 0 3rd Qu.: 0
## Max. :343.0000 Max. :5052.500 Max. :46336283 Max. :28300874
## active.min active.max active.tot
## Min. : 0 Min. : 0 Min. :0.000e+00
## 1st Qu.: 1 1st Qu.: 1 1st Qu.:1.000e+00
## Median : 4 Median : 4 Median :4.000e+00
## Mean : 133155 Mean : 178590 Mean :2.929e+05
## 3rd Qu.: 5 3rd Qu.: 5 3rd Qu.:5.000e+00
## Max. :312507974 Max. :848097909 Max. :2.945e+09
## active.avg active.std idle.min
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.: 0 1st Qu.: 0
## Median : 4 Median : 0 Median : 0
## Mean : 148135 Mean : 23536 Mean : 1616655
## 3rd Qu.: 5 3rd Qu.: 0 3rd Qu.: 0
## Max. :437493062 Max. :477486236 Max. :299999988
## idle.max idle.tot idle.avg
## Min. : 0 Min. :0.000e+00 Min. : 0
## 1st Qu.: 0 1st Qu.:0.000e+00 1st Qu.: 0
## Median : 0 Median :0.000e+00 Median : 0
## Mean : 1701956 Mean :3.518e+06 Mean : 1664985
## 3rd Qu.: 0 3rd Qu.:0.000e+00 3rd Qu.: 0
## Max. :299999988 Max. :2.097e+10 Max. :299999988
## idle.std fwd_init_window_size bwd_init_window_size
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 64 1st Qu.: 0
## Median : 0 Median : 64 Median : 0
## Mean : 45502 Mean : 6119 Mean : 2740
## 3rd Qu.: 0 3rd Qu.: 64 3rd Qu.: 0
## Max. :120802871 Max. :65535 Max. :65535
## fwd_last_window_size Attack_type
## Min. : 0.0 Length:123117
## 1st Qu.: 64.0 Class :character
## Median : 64.0 Mode :character
## Mean : 751.6
## 3rd Qu.: 64.0
## Max. :65535.0
#PhiUSIIL_Phishing_URL_Dataset **
library(readr)
PhiUSIIL_Phishing_URL_Dataset <- read.csv("~/Desktop/NCU/DissertationDatasets/PhiUSIIL_Phishing_URL_Dataset.csv")
head(str(PhiUSIIL_Phishing_URL_Dataset))
## 'data.frame': 235795 obs. of 56 variables:
## $ FILENAME : chr "521848.txt" "31372.txt" "597387.txt" "554095.txt" ...
## $ URL : chr "https://www.southbankmosaics.com" "https://www.uni-mainz.de" "https://www.voicefmradio.co.uk" "https://www.sfnmjournal.com" ...
## $ URLLength : int 31 23 29 26 33 30 25 25 29 18 ...
## $ Domain : chr "www.southbankmosaics.com" "www.uni-mainz.de" "www.voicefmradio.co.uk" "www.sfnmjournal.com" ...
## $ DomainLength : int 24 16 22 19 26 23 18 18 22 11 ...
## $ IsDomainIP : int 0 0 0 0 0 0 0 0 0 0 ...
## $ TLD : chr "com" "de" "uk" "com" ...
## $ URLSimilarityIndex : num 100 100 100 100 100 100 100 100 100 100 ...
## $ CharContinuationRate : num 1 0.667 0.867 1 1 ...
## $ TLDLegitimateProb : num 0.5229 0.0327 0.0286 0.5229 0.08 ...
## $ URLCharProb : num 0.0619 0.0502 0.0641 0.0576 0.0594 ...
## $ TLDLength : int 3 2 2 3 3 3 3 3 2 3 ...
## $ NoOfSubDomain : int 1 1 2 1 1 1 1 1 1 1 ...
## $ HasObfuscation : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfObfuscatedChar : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ObfuscationRatio : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfLettersInURL : int 18 9 15 13 20 17 12 12 16 5 ...
## $ LetterRatioInURL : num 0.581 0.391 0.517 0.5 0.606 0.567 0.48 0.48 0.552 0.278 ...
## $ NoOfDegitsInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DegitRatioInURL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfEqualsInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfQMarkInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfAmpersandInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfOtherSpecialCharsInURL: int 1 2 2 1 1 1 1 1 1 1 ...
## $ SpacialCharRatioInURL : num 0.032 0.087 0.069 0.038 0.03 0.033 0.04 0.04 0.034 0.056 ...
## $ IsHTTPS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LineOfCode : int 558 618 467 6356 6089 1210 1024 514 2371 2730 ...
## $ LargestLineLength : int 9381 9381 682 26824 28404 737 984 399 12913 481 ...
## $ HasTitle : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Title : chr "ข่าวสด ข่าววันนี้ ข่าวà¸\u0081ีฬา ข่าวบันเทิà"| __truncated__ "johannes gutenberg-universität mainz" "voice fm southampton" "home page: seminars in fetal and neonatal medicine " ...
## $ DomainTitleMatchScore : num 0 55.6 46.7 0 100 ...
## $ URLTitleMatchScore : num 0 55.6 46.7 0 100 ...
## $ HasFavicon : int 0 1 0 0 0 0 1 1 0 1 ...
## $ Robots : int 1 1 1 1 1 0 0 0 1 1 ...
## $ IsResponsive : int 1 0 1 1 1 1 1 1 1 1 ...
## $ NoOfURLRedirect : int 0 0 0 0 1 0 1 0 0 0 ...
## $ NoOfSelfRedirect : int 0 0 0 0 1 0 1 0 0 0 ...
## $ HasDescription : int 0 0 1 0 1 1 0 1 1 1 ...
## $ NoOfPopup : int 0 0 0 1 0 1 2 0 0 0 ...
## $ NoOfiFrame : int 1 0 0 12 2 1 4 1 0 2 ...
## $ HasExternalFormSubmit : int 0 0 0 0 0 0 0 0 0 0 ...
## $ HasSocialNet : int 0 1 0 1 1 1 1 1 1 1 ...
## $ HasSubmitButton : int 1 1 1 1 1 0 0 1 1 0 ...
## $ HasHiddenFields : int 1 0 1 1 1 1 1 0 1 0 ...
## $ HasPasswordField : int 0 0 0 0 0 0 0 0 1 0 ...
## $ Bank : int 1 0 0 0 1 0 0 0 0 0 ...
## $ Pay : int 0 0 0 1 1 0 0 0 0 0 ...
## $ Crypto : int 0 0 0 1 0 0 0 0 0 0 ...
## $ HasCopyrightInfo : int 1 1 1 1 1 1 1 1 1 1 ...
## $ NoOfImage : int 34 50 10 3 244 35 32 24 71 10 ...
## $ NoOfCSS : int 20 9 2 27 15 1 4 2 4 1 ...
## $ NoOfJS : int 28 8 7 15 34 11 14 22 9 12 ...
## $ NoOfSelfRef : int 119 39 42 22 72 86 44 36 40 173 ...
## $ NoOfEmptyRef : int 0 0 2 1 1 0 2 0 1 6 ...
## $ NoOfExternalRef : int 124 217 5 31 85 14 17 15 317 65 ...
## $ label : int 1 1 1 1 1 1 1 1 1 1 ...
## NULL
summary(PhiUSIIL_Phishing_URL_Dataset)
## FILENAME URL URLLength Domain
## Length:235795 Length:235795 Min. : 13.00 Length:235795
## Class :character Class :character 1st Qu.: 23.00 Class :character
## Mode :character Mode :character Median : 27.00 Mode :character
## Mean : 34.57
## 3rd Qu.: 34.00
## Max. :6097.00
## DomainLength IsDomainIP TLD URLSimilarityIndex
## Min. : 4.00 Min. :0.000000 Length:235795 Min. : 0.1556
## 1st Qu.: 16.00 1st Qu.:0.000000 Class :character 1st Qu.: 57.0248
## Median : 20.00 Median :0.000000 Mode :character Median :100.0000
## Mean : 21.47 Mean :0.002706 Mean : 78.4308
## 3rd Qu.: 24.00 3rd Qu.:0.000000 3rd Qu.:100.0000
## Max. :110.00 Max. :1.000000 Max. :100.0000
## CharContinuationRate TLDLegitimateProb URLCharProb TLDLength
## Min. :0.0000 Min. :0.000000 Min. :0.001083 Min. : 2.000
## 1st Qu.:0.6800 1st Qu.:0.005977 1st Qu.:0.050747 1st Qu.: 2.000
## Median :1.0000 Median :0.079963 Median :0.057970 Median : 3.000
## Mean :0.8455 Mean :0.260423 Mean :0.055747 Mean : 2.764
## 3rd Qu.:1.0000 3rd Qu.:0.522907 3rd Qu.:0.062875 3rd Qu.: 3.000
## Max. :1.0000 Max. :0.522907 Max. :0.090824 Max. :13.000
## NoOfSubDomain HasObfuscation NoOfObfuscatedChar ObfuscationRatio
## Min. : 0.000 Min. :0.000000 Min. : 0.0000 Min. :0.0000000
## 1st Qu.: 1.000 1st Qu.:0.000000 1st Qu.: 0.0000 1st Qu.:0.0000000
## Median : 1.000 Median :0.000000 Median : 0.0000 Median :0.0000000
## Mean : 1.165 Mean :0.002057 Mean : 0.0249 Mean :0.0001384
## 3rd Qu.: 1.000 3rd Qu.:0.000000 3rd Qu.: 0.0000 3rd Qu.:0.0000000
## Max. :10.000 Max. :1.000000 Max. :447.0000 Max. :0.3480000
## NoOfLettersInURL LetterRatioInURL NoOfDegitsInURL DegitRatioInURL
## Min. : 0.00 Min. :0.0000 Min. : 0.000 Min. :0.00000
## 1st Qu.: 10.00 1st Qu.:0.4350 1st Qu.: 0.000 1st Qu.:0.00000
## Median : 14.00 Median :0.5190 Median : 0.000 Median :0.00000
## Mean : 19.43 Mean :0.5159 Mean : 1.881 Mean :0.02862
## 3rd Qu.: 20.00 3rd Qu.:0.5940 3rd Qu.: 0.000 3rd Qu.:0.00000
## Max. :5191.00 Max. :0.9260 Max. :2011.000 Max. :0.68400
## NoOfEqualsInURL NoOfQMarkInURL NoOfAmpersandInURL
## Min. : 0.00000 Min. :0.0000 Min. : 0.00000
## 1st Qu.: 0.00000 1st Qu.:0.0000 1st Qu.: 0.00000
## Median : 0.00000 Median :0.0000 Median : 0.00000
## Mean : 0.06224 Mean :0.0294 Mean : 0.02506
## 3rd Qu.: 0.00000 3rd Qu.:0.0000 3rd Qu.: 0.00000
## Max. :176.00000 Max. :4.0000 Max. :149.00000
## NoOfOtherSpecialCharsInURL SpacialCharRatioInURL IsHTTPS
## Min. : 0.00 Min. :0.00000 Min. :0.0000
## 1st Qu.: 1.00 1st Qu.:0.03800 1st Qu.:1.0000
## Median : 1.00 Median :0.05000 Median :1.0000
## Mean : 2.34 Mean :0.06331 Mean :0.7826
## 3rd Qu.: 3.00 3rd Qu.:0.08300 3rd Qu.:1.0000
## Max. :499.00 Max. :0.39700 Max. :1.0000
## LineOfCode LargestLineLength HasTitle Title
## Min. : 2 Min. : 22 Min. :0.0000 Length:235795
## 1st Qu.: 18 1st Qu.: 200 1st Qu.:1.0000 Class :character
## Median : 429 Median : 1090 Median :1.0000 Mode :character
## Mean : 1142 Mean : 12790 Mean :0.8613
## 3rd Qu.: 1277 3rd Qu.: 8047 3rd Qu.:1.0000
## Max. :442666 Max. :13975732 Max. :1.0000
## DomainTitleMatchScore URLTitleMatchScore HasFavicon Robots
## Min. : 0.00 Min. : 0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 75.00 Median :100.00 Median :0.0000 Median :0.0000
## Mean : 50.13 Mean : 52.12 Mean :0.3618 Mean :0.2665
## 3rd Qu.:100.00 3rd Qu.:100.00 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :100.00 Max. :100.00 Max. :1.0000 Max. :1.0000
## IsResponsive NoOfURLRedirect NoOfSelfRedirect HasDescription
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.6245 Mean :0.1334 Mean :0.04011 Mean :0.4402
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## NoOfPopup NoOfiFrame HasExternalFormSubmit HasSocialNet
## Min. : 0.0000 Min. : 0.000 Min. :0.00000 Min. :0.0000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.:0.00000 1st Qu.:0.0000
## Median : 0.0000 Median : 0.000 Median :0.00000 Median :0.0000
## Mean : 0.2218 Mean : 1.589 Mean :0.04399 Mean :0.4566
## 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :602.0000 Max. :1602.000 Max. :1.00000 Max. :1.0000
## HasSubmitButton HasHiddenFields HasPasswordField Bank
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4143 Mean :0.3778 Mean :0.1023 Mean :0.1271
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Pay Crypto HasCopyrightInfo NoOfImage
## Min. :0.000 Min. :0.00000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 0.00
## Median :0.000 Median :0.00000 Median :0.0000 Median : 8.00
## Mean :0.237 Mean :0.02347 Mean :0.4868 Mean : 26.08
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.: 29.00
## Max. :1.000 Max. :1.00000 Max. :1.0000 Max. :8956.00
## NoOfCSS NoOfJS NoOfSelfRef NoOfEmptyRef
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 2.00 Median : 6.00 Median : 12.00 Median : 0.000
## Mean : 6.33 Mean : 10.52 Mean : 65.07 Mean : 2.378
## 3rd Qu.: 8.00 3rd Qu.: 15.00 3rd Qu.: 88.00 3rd Qu.: 1.000
## Max. :35820.00 Max. :6957.00 Max. :27397.00 Max. :4887.000
## NoOfExternalRef label
## Min. : 0.00 Min. :0.0000
## 1st Qu.: 1.00 1st Qu.:0.0000
## Median : 10.00 Median :1.0000
## Mean : 49.26 Mean :0.5719
## 3rd Qu.: 57.00 3rd Qu.:1.0000
## Max. :27516.00 Max. :1.0000
#in_vehicle_coupon_recommendation **
library(readr)
in.vehicle.coupon.recommendation <- read.csv("~/Desktop/NCU/DissertationDatasets/in-vehicle-coupon-recommendation.csv", header=TRUE)
head(str(in.vehicle.coupon.recommendation))
## 'data.frame': 12684 obs. of 26 variables:
## $ destination : chr "No Urgent Place" "No Urgent Place" "No Urgent Place" "No Urgent Place" ...
## $ passanger : chr "Alone" "Friend(s)" "Friend(s)" "Friend(s)" ...
## $ weather : chr "Sunny" "Sunny" "Sunny" "Sunny" ...
## $ temperature : int 55 80 80 80 80 80 55 80 80 80 ...
## $ time : chr "2PM" "10AM" "10AM" "2PM" ...
## $ coupon : chr "Restaurant(<20)" "Coffee House" "Carry out & Take away" "Coffee House" ...
## $ expiration : chr "1d" "2h" "2h" "2h" ...
## $ gender : chr "Female" "Female" "Female" "Female" ...
## $ age : chr "21" "21" "21" "21" ...
## $ maritalStatus : chr "Unmarried partner" "Unmarried partner" "Unmarried partner" "Unmarried partner" ...
## $ has_children : int 1 1 1 1 1 1 1 1 1 1 ...
## $ education : chr "Some college - no degree" "Some college - no degree" "Some college - no degree" "Some college - no degree" ...
## $ occupation : chr "Unemployed" "Unemployed" "Unemployed" "Unemployed" ...
## $ income : chr "$37500 - $49999" "$37500 - $49999" "$37500 - $49999" "$37500 - $49999" ...
## $ car : chr "" "" "" "" ...
## $ Bar : chr "never" "never" "never" "never" ...
## $ CoffeeHouse : chr "never" "never" "never" "never" ...
## $ CarryAway : chr "" "" "" "" ...
## $ RestaurantLessThan20: chr "4~8" "4~8" "4~8" "4~8" ...
## $ Restaurant20To50 : chr "1~3" "1~3" "1~3" "1~3" ...
## $ toCoupon_GEQ5min : int 1 1 1 1 1 1 1 1 1 1 ...
## $ toCoupon_GEQ15min : int 0 0 1 1 1 1 1 1 1 1 ...
## $ toCoupon_GEQ25min : int 0 0 0 0 0 0 0 0 0 0 ...
## $ direction_same : int 0 0 0 0 0 0 0 0 0 0 ...
## $ direction_opp : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Y : int 1 0 1 0 0 1 1 1 1 0 ...
## NULL
summary(in.vehicle.coupon.recommendation)
## destination passanger weather temperature
## Length:12684 Length:12684 Length:12684 Min. :30.0
## Class :character Class :character Class :character 1st Qu.:55.0
## Mode :character Mode :character Mode :character Median :80.0
## Mean :63.3
## 3rd Qu.:80.0
## Max. :80.0
## time coupon expiration gender
## Length:12684 Length:12684 Length:12684 Length:12684
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## age maritalStatus has_children education
## Length:12684 Length:12684 Min. :0.0000 Length:12684
## Class :character Class :character 1st Qu.:0.0000 Class :character
## Mode :character Mode :character Median :0.0000 Mode :character
## Mean :0.4141
## 3rd Qu.:1.0000
## Max. :1.0000
## occupation income car Bar
## Length:12684 Length:12684 Length:12684 Length:12684
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## CoffeeHouse CarryAway RestaurantLessThan20 Restaurant20To50
## Length:12684 Length:12684 Length:12684 Length:12684
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## toCoupon_GEQ5min toCoupon_GEQ15min toCoupon_GEQ25min direction_same
## Min. :1 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :1 Mean :0.5615 Mean :0.1191 Mean :0.2148
## 3rd Qu.:1 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1 Max. :1.0000 Max. :1.0000 Max. :1.0000
## direction_opp Y
## Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000
## Mean :0.7852 Mean :0.5684
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
#HTRU_2library(readr)
HTRU_2 <- read.csv("~/Downloads/htru2/HTRU_2.csv", header=FALSE)
head(str(HTRU_2))
## 'data.frame': 17898 obs. of 9 variables:
## $ V1: num 140.6 102.5 103 136.8 88.7 ...
## $ V2: num 55.7 58.9 39.3 57.2 40.7 ...
## $ V3: num -0.2346 0.4653 0.3233 -0.0684 0.6009 ...
## $ V4: num -0.7 -0.515 1.051 -0.636 1.123 ...
## $ V5: num 3.2 1.68 3.12 3.64 1.18 ...
## $ V6: num 19.1 14.9 21.7 21 11.5 ...
## $ V7: num 7.98 10.58 7.74 6.9 14.27 ...
## $ V8: num 74.2 127.4 63.2 53.6 252.6 ...
## $ V9: int 0 0 0 0 0 0 0 0 0 0 ...
## NULL
summary(HTRU_2)
## V1 V2 V3 V4
## Min. : 5.812 Min. :24.77 Min. :-1.8760 Min. :-1.7919
## 1st Qu.:100.930 1st Qu.:42.38 1st Qu.: 0.0271 1st Qu.:-0.1886
## Median :115.078 Median :46.95 Median : 0.2232 Median : 0.1987
## Mean :111.080 Mean :46.55 Mean : 0.4779 Mean : 1.7703
## 3rd Qu.:127.086 3rd Qu.:51.02 3rd Qu.: 0.4733 3rd Qu.: 0.9278
## Max. :192.617 Max. :98.78 Max. : 8.0695 Max. :68.1016
## V5 V6 V7 V8
## Min. : 0.2132 Min. : 7.37 Min. :-3.139 Min. : -1.977
## 1st Qu.: 1.9231 1st Qu.: 14.44 1st Qu.: 5.782 1st Qu.: 34.961
## Median : 2.8018 Median : 18.46 Median : 8.434 Median : 83.065
## Mean : 12.6144 Mean : 26.33 Mean : 8.304 Mean : 104.858
## 3rd Qu.: 5.4643 3rd Qu.: 28.43 3rd Qu.:10.703 3rd Qu.: 139.309
## Max. :223.3921 Max. :110.64 Max. :34.540 Max. :1191.001
## V9
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.00000
## Mean :0.09157
## 3rd Qu.:0.00000
## Max. :1.00000
#Internet Firewall Dataset
log2 <- read.csv("~/Desktop/NCU/DissertationDataSets2/log2.csv")
head(str(log2))
## 'data.frame': 65532 obs. of 12 variables:
## $ Source.Port : int 57222 56258 6881 50553 50002 51465 60513 50049 52244 50627 ...
## $ Destination.Port : int 53 3389 50321 3389 443 443 47094 443 58774 443 ...
## $ NAT.Source.Port : int 54587 56258 43265 50553 45848 39975 45469 21285 2211 16215 ...
## $ NAT.Destination.Port: int 53 3389 50321 3389 443 443 47094 443 58774 443 ...
## $ Action : chr "allow" "allow" "allow" "allow" ...
## $ Bytes : int 177 4768 238 3327 25358 3961 320 7912 70 8256 ...
## $ Bytes.Sent : int 94 1600 118 1438 6778 1595 140 3269 70 1674 ...
## $ Bytes.Received : int 83 3168 120 1889 18580 2366 180 4643 0 6582 ...
## $ Packets : int 2 19 2 15 31 21 6 23 1 31 ...
## $ Elapsed.Time..sec. : int 30 17 1199 17 16 16 7 96 5 75 ...
## $ pkts_sent : int 1 10 1 8 13 12 3 12 1 15 ...
## $ pkts_received : int 1 9 1 7 18 9 3 11 0 16 ...
## NULL
summary(log2)
## Source.Port Destination.Port NAT.Source.Port NAT.Destination.Port
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:49183 1st Qu.: 80 1st Qu.: 0 1st Qu.: 0
## Median :53776 Median : 445 Median : 8820 Median : 53
## Mean :49392 Mean :10577 Mean :19283 Mean : 2671
## 3rd Qu.:58638 3rd Qu.:15000 3rd Qu.:38366 3rd Qu.: 443
## Max. :65534 Max. :65535 Max. :65535 Max. :65535
## Action Bytes Bytes.Sent Bytes.Received
## Length:65532 Min. :6.000e+01 Min. : 60 Min. : 0
## Class :character 1st Qu.:6.600e+01 1st Qu.: 66 1st Qu.: 0
## Mode :character Median :1.680e+02 Median : 90 Median : 79
## Mean :9.712e+04 Mean : 22386 Mean : 74738
## 3rd Qu.:7.520e+02 3rd Qu.: 210 3rd Qu.: 449
## Max. :1.269e+09 Max. :948477220 Max. :320881795
## Packets Elapsed.Time..sec. pkts_sent pkts_received
## Min. : 1.0 Min. : 0.00 Min. : 1.0 Min. : 0.0
## 1st Qu.: 1.0 1st Qu.: 0.00 1st Qu.: 1.0 1st Qu.: 0.0
## Median : 2.0 Median : 15.00 Median : 1.0 Median : 1.0
## Mean : 102.9 Mean : 65.83 Mean : 41.4 Mean : 61.5
## 3rd Qu.: 6.0 3rd Qu.: 30.00 3rd Qu.: 3.0 3rd Qu.: 2.0
## Max. :1036116.0 Max. :10824.00 Max. :747520.0 Max. :327208.0
##Add Bayesian tests functions
#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 3000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
#for the moment we implement the sign test. Signedrank will follows
probLeft <- mean (diffVector < rope_min)
probRope <- mean (diffVector > rope_min & diffVector < rope_max)
probRight <- mean (diffVector > rope_max)
results = list ("probLeft"=probLeft, "probRope"=probRope,
"probRight"=probRight)
return (results)
}
##Create function to conduct Bayesian Signed Rank Test
BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 30000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
sampledWeights <- rdirichlet(samples,weights)
winLeft <- vector(length = samples)
winRope <- vector(length = samples)
winRight <- vector(length = samples)
for (rep in 1:samples){
currentWeights <- sampledWeights[rep,]
for (i in 1:length(currentWeights)){
for (j in 1:length(currentWeights)){
product= currentWeights[i] * currentWeights[j]
if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
winRight[rep] <- winRight[rep] + product
}
else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
winRope[rep] <- winRope[rep] + product
}
else {
winLeft[rep] <- winLeft[rep] + product
}
}
}
maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
winRight[rep] <- (winRight[rep]==maxWins)*1/winners
winRope[rep] <- (winRope[rep]==maxWins)*1/winners
winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
}
results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
"winRight"=mean(winRight) )
return (results)
}
#Create function to conduct the Bayesian Correlated t.test
#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.
#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
if (rope_max < rope_min){
stop("rope_max should be larger than rope_min")
}
delta <- mean(diff_a_b)
n <- length(diff_a_b)
df <- n-1
stdX <- sd(diff_a_b)
sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
p.left <- pt((rope_min - delta)/sp, df)
p.rope <- pt((rope_max - delta)/sp, df)-p.left
results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)
#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)
#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))
#str final data frame
head(str(adult.one_hot_df))
## 'data.frame': 32561 obs. of 110 variables:
## $ V1 : num 39 50 38 53 28 37 49 52 31 42 ...
## $ V2.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 0 0 1 1 1 1 1 0 1 1 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 1 0 0 0 0 0 1 0 0 ...
## $ V2.State.gov : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 77516 83311 215646 234721 338409 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 1 0 0 0 0 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 1 1 0 0 1 0 0 0 0 1 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 1 0 0 0 0 1 0 0 ...
## $ V4.Masters : num 0 0 0 0 0 1 0 0 1 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V5 : num 13 13 9 7 13 14 5 9 14 13 ...
## $ V6.Divorced : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 0 1 0 1 1 1 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V6.Never.married : num 1 0 0 0 0 0 0 0 1 0 ...
## $ V6.Separated : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Adm.clerical : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 1 0 0 0 1 0 1 0 1 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 0 1 0 1 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 1 0 1 0 0 0 1 0 1 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Unmarried : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 1 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 1 1 0 1 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 0 0 1 0 1 1 1 ...
## $ V10.Female : num 0 0 0 0 1 1 1 0 1 0 ...
## $ V10.Male : num 1 1 1 1 0 0 0 1 0 1 ...
## $ V11 : num 2174 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 13 40 40 40 40 16 45 50 40 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]
##Persistent homology of adult dataset
#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset.
adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame': 1000 obs. of 110 variables:
## $ V1 : num 33 25 39 21 32 26 20 58 24 63 ...
## $ V2.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 1 1 1 1 1 1 0 0 1 0 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 0 0 0 0 0 0 0 0 1 ...
## $ V2.State.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 176992 105693 234901 198050 134886 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 0 0 0 0 1 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 0 1 0 0 0 1 0 0 0 0 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V4.Masters : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 1 0 0 1 ...
## $ V5 : num 14 13 12 12 9 13 10 7 9 10 ...
## $ V6.Divorced : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 1 0 0 0 1 0 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Never.married : num 0 1 0 1 0 1 1 0 1 0 ...
## $ V6.Separated : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V7.Adm.clerical : num 0 0 1 1 1 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 0 0 1 1 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 1 1 0 0 0 1 0 0 0 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 1 0 0 0 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 0 1 0 1 0 1 0 0 0 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 1 0 ...
## $ V8.Unmarried : num 0 0 1 0 0 0 1 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V10.Female : num 0 1 0 1 1 1 1 0 1 0 ...
## $ V10.Male : num 1 0 1 0 0 0 0 1 0 1 ...
## $ V11 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 40 40 25 40 40 20 16 25 48 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)
# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

##One hot encoding for DryBean Dataset dataset
library(caret)
#define one-hot encoding function
dummy_drybean <- dummyVars(" ~ .", data=Dry_Bean_Dataset)
#perform one-hot encoding on data frame
dry_bean_dataset_one_hot_df <- data.frame(predict(dummy_drybean, newdata=Dry_Bean_Dataset))
dry_bean_dataset_one_hot_1000_df <- dry_bean_dataset_one_hot_df[sample(nrow(dry_bean_dataset_one_hot_df), size = 1000, replace = FALSE), ]
head(str(dry_bean_dataset_one_hot_1000_df))
## 'data.frame': 1000 obs. of 23 variables:
## $ Area : num 39621 38687 36878 29154 69852 ...
## $ Perimeter : num 734 722 723 631 1095 ...
## $ MajorAxisLength: num 256 266 264 234 378 ...
## $ MinorAxisLength: num 197 185 178 159 236 ...
## $ AspectRation : num 1.3 1.44 1.48 1.47 1.6 ...
## $ Eccentricity : num 0.64 0.717 0.738 0.731 0.78 ...
## $ ConvexArea : num 40021 39030 37329 29499 70620 ...
## $ EquivDiameter : num 225 222 217 193 298 ...
## $ Extent : num 0.784 0.804 0.759 0.747 0.769 ...
## $ Solidity : num 0.99 0.991 0.988 0.988 0.989 ...
## $ roundness : num 0.925 0.932 0.887 0.92 0.732 ...
## $ Compactness : num 0.876 0.834 0.821 0.825 0.789 ...
## $ ShapeFactor1 : num 0.00647 0.00688 0.00716 0.00801 0.00541 ...
## $ ShapeFactor2 : num 0.00235 0.00205 0.002 0.00229 0.00129 ...
## $ ShapeFactor3 : num 0.767 0.696 0.674 0.681 0.622 ...
## $ ShapeFactor4 : num 0.999 0.998 0.998 0.998 0.995 ...
## $ ClassBARBUNYA : num 0 0 0 0 1 1 0 0 0 0 ...
## $ ClassBOMBAY : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ClassCALI : num 0 0 0 0 0 0 1 0 0 0 ...
## $ ClassDERMASON : num 0 0 1 1 0 0 0 0 0 0 ...
## $ ClassHOROZ : num 0 0 0 0 0 0 0 1 0 0 ...
## $ ClassSEKER : num 1 0 0 0 0 0 0 0 0 0 ...
## $ ClassSIRA : num 0 1 0 0 0 0 0 0 1 1 ...
## NULL
##Persistent Homology of DryBean dataset
# calculate persistent homology for DryBean Dataset
phom_drybean_df <- calculate_homology(dry_bean_dataset_one_hot_1000_df)
# plot barcode for DryBean Dataset
plot_barcode(phom_drybean_df)

# plot persistent diagram of DryBean Dataset
plot_persist(phom_drybean_df)

##Persistent Homology of Taiwanese Bankruptcy dataset
taiwanese_data<-data
# calculate persistent homology for Taiwanese Bankruptcy Dataset
phom_taiwanese_data_df <- calculate_homology(taiwanese_data)
# plot barcode for Taiwanese Bankruptcy Dataset
plot_barcode(phom_taiwanese_data_df)

# plot persistent diagram of Taiwanese Bankruptcy Dataset
plot_persist(phom_taiwanese_data_df)

##One hot encoding for HTRU_2 dataset
library(caret)
#define one-hot encoding function
dummy_HTRU_2<- dummyVars(" ~ .", data=HTRU_2)
#perform one-hot encoding on data frame
HTRU_2.one_hot_df <- data.frame(predict(dummy_HTRU_2, newdata=HTRU_2))
#str final data frame
head(str(HTRU_2.one_hot_df))
## 'data.frame': 17898 obs. of 9 variables:
## $ V1: num 140.6 102.5 103 136.8 88.7 ...
## $ V2: num 55.7 58.9 39.3 57.2 40.7 ...
## $ V3: num -0.2346 0.4653 0.3233 -0.0684 0.6009 ...
## $ V4: num -0.7 -0.515 1.051 -0.636 1.123 ...
## $ V5: num 3.2 1.68 3.12 3.64 1.18 ...
## $ V6: num 19.1 14.9 21.7 21 11.5 ...
## $ V7: num 7.98 10.58 7.74 6.9 14.27 ...
## $ V8: num 74.2 127.4 63.2 53.6 252.6 ...
## $ V9: num 0 0 0 0 0 0 0 0 0 0 ...
## NULL
##Persistent Homology of HTRU_2 dataset
# calculate persistent homology for HTRU_2 Dataset
phom_HTRU_2_data_df <- calculate_homology(HTRU_2)
# plot barcode for HTRU_2 Dataset
plot_barcode(phom_HTRU_2_data_df)

# plot persistent diagram of HTRU_2 Dataset
plot_persist(phom_HTRU_2_data_df)

##One hot encoding for In.vehicle.coupon.recommendation dataset
library(caret)
#define one-hot encoding function
dummy_in.vehicle.coupon.recommendation<- dummyVars(" ~ .", data=in.vehicle.coupon.recommendation)
#perform one-hot encoding on data frame
in.vehicle.coupon.recommendation_one_hot_df <- data.frame(predict(dummy_in.vehicle.coupon.recommendation, newdata=in.vehicle.coupon.recommendation))
#str final data frame
head(str(in.vehicle.coupon.recommendation_one_hot_df))
## 'data.frame': 12684 obs. of 121 variables:
## $ destinationHome : num 0 0 0 0 0 0 0 0 0 0 ...
## $ destinationNo.Urgent.Place : num 1 1 1 1 1 1 1 1 1 1 ...
## $ destinationWork : num 0 0 0 0 0 0 0 0 0 0 ...
## $ passangerAlone : num 1 0 0 0 0 0 0 0 0 0 ...
## $ passangerFriend.s. : num 0 1 1 1 1 1 1 0 0 0 ...
## $ passangerKid.s. : num 0 0 0 0 0 0 0 1 1 1 ...
## $ passangerPartner : num 0 0 0 0 0 0 0 0 0 0 ...
## $ weatherRainy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ weatherSnowy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ weatherSunny : num 1 1 1 1 1 1 1 1 1 1 ...
## $ temperature : num 55 80 80 80 80 80 55 80 80 80 ...
## $ time10AM : num 0 1 1 0 0 0 0 1 1 1 ...
## $ time10PM : num 0 0 0 0 0 0 0 0 0 0 ...
## $ time2PM : num 1 0 0 1 1 0 1 0 0 0 ...
## $ time6PM : num 0 0 0 0 0 1 0 0 0 0 ...
## $ time7AM : num 0 0 0 0 0 0 0 0 0 0 ...
## $ couponBar : num 0 0 0 0 0 0 0 0 0 1 ...
## $ couponCarry.out...Take.away : num 0 0 1 0 0 0 1 0 1 0 ...
## $ couponCoffee.House : num 0 1 0 1 1 0 0 0 0 0 ...
## $ couponRestaurant..20. : num 1 0 0 0 0 1 0 1 0 0 ...
## $ couponRestaurant.20.50. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ expiration1d : num 1 0 0 0 1 0 1 0 0 1 ...
## $ expiration2h : num 0 1 1 1 0 1 0 1 1 0 ...
## $ genderFemale : num 1 1 1 1 1 1 1 1 1 1 ...
## $ genderMale : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age21 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ age26 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age31 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age36 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age46 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age50plus : num 0 0 0 0 0 0 0 0 0 0 ...
## $ agebelow21 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalStatusDivorced : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalStatusMarried.partner : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalStatusSingle : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalStatusUnmarried.partner : num 1 1 1 1 1 1 1 1 1 1 ...
## $ maritalStatusWidowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ has_children : num 1 1 1 1 1 1 1 1 1 1 ...
## $ educationAssociates.degree : num 0 0 0 0 0 0 0 0 0 0 ...
## $ educationBachelors.degree : num 0 0 0 0 0 0 0 0 0 0 ...
## $ educationGraduate.degree..Masters.or.Doctorate. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ educationHigh.School.Graduate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ educationSome.college...no.degree : num 1 1 1 1 1 1 1 1 1 1 ...
## $ educationSome.High.School : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationArchitecture...Engineering : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationArts.Design.Entertainment.Sports...Media : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationBuilding...Grounds.Cleaning...Maintenance: num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationBusiness...Financial : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationCommunity...Social.Services : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationComputer...Mathematical : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationConstruction...Extraction : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationEducation.Training.Library : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationFarming.Fishing...Forestry : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationFood.Preparation...Serving.Related : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationHealthcare.Practitioners...Technical : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationHealthcare.Support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationInstallation.Maintenance...Repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationLegal : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationLife.Physical.Social.Science : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationManagement : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationOffice...Administrative.Support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationPersonal.Care...Service : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationProduction.Occupations : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationProtective.Service : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationRetired : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationSales...Related : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationStudent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationTransportation...Material.Moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationUnemployed : num 1 1 1 1 1 1 1 1 1 1 ...
## $ income.100000.or.More : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.12500....24999 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.25000....37499 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.37500....49999 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ income.50000....62499 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.62500....74999 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.75000....87499 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.87500....99999 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ incomeLess.than..12500 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ car : num 1 1 1 1 1 1 1 1 1 1 ...
## $ carCar.that.is.too.old.to.install.Onstar..D : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carcrossover : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cardo.not.drive : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carMazda5 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carScooter.and.motorcycle : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bar1.3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bar4.8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bargt8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Barless1 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Barnever : num 1 1 1 1 1 1 1 1 1 1 ...
## $ CoffeeHouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHouse1.3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHouse4.8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHousegt8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHouseless1 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHousenever : num 1 1 1 1 1 1 1 1 1 1 ...
## $ CarryAway : num 1 1 1 1 1 1 1 1 1 1 ...
## $ CarryAway1.3 : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
in.vehicle.coupon.recommendation_one_hot_100_df <- in.vehicle.coupon.recommendation_one_hot_df[sample(nrow(in.vehicle.coupon.recommendation_one_hot_df), size = 100, replace = FALSE), ]
head(str(in.vehicle.coupon.recommendation_one_hot_100_df))
## 'data.frame': 100 obs. of 121 variables:
## $ destinationHome : num 0 0 0 0 0 1 0 0 0 0 ...
## $ destinationNo.Urgent.Place : num 1 1 0 1 1 0 1 0 1 1 ...
## $ destinationWork : num 0 0 1 0 0 0 0 1 0 0 ...
## $ passangerAlone : num 1 0 1 0 0 1 0 1 1 1 ...
## $ passangerFriend.s. : num 0 1 0 1 1 0 0 0 0 0 ...
## $ passangerKid.s. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ passangerPartner : num 0 0 0 0 0 0 1 0 0 0 ...
## $ weatherRainy : num 0 0 1 0 0 0 0 0 0 0 ...
## $ weatherSnowy : num 0 0 0 0 0 1 0 1 1 0 ...
## $ weatherSunny : num 1 1 0 1 1 0 1 0 0 1 ...
## $ temperature : num 80 80 55 80 80 30 80 30 30 80 ...
## $ time10AM : num 1 1 0 1 1 0 1 0 0 0 ...
## $ time10PM : num 0 0 0 0 0 1 0 0 0 1 ...
## $ time2PM : num 0 0 0 0 0 0 0 0 1 0 ...
## $ time6PM : num 0 0 0 0 0 0 0 0 0 0 ...
## $ time7AM : num 0 0 1 0 0 0 0 1 0 0 ...
## $ couponBar : num 0 0 0 1 0 0 0 0 1 0 ...
## $ couponCarry.out...Take.away : num 0 0 1 0 1 0 0 0 0 0 ...
## $ couponCoffee.House : num 1 1 0 0 0 1 1 0 0 0 ...
## $ couponRestaurant..20. : num 0 0 0 0 0 0 0 1 0 1 ...
## $ couponRestaurant.20.50. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ expiration1d : num 1 0 1 1 0 0 0 0 1 1 ...
## $ expiration2h : num 0 1 0 0 1 1 1 1 0 0 ...
## $ genderFemale : num 0 0 0 0 0 1 0 0 0 1 ...
## $ genderMale : num 1 1 1 1 1 0 1 1 1 0 ...
## $ age21 : num 1 0 1 0 1 0 0 0 0 0 ...
## $ age26 : num 0 0 0 0 0 1 1 0 0 0 ...
## $ age31 : num 0 1 0 0 0 0 0 1 1 0 ...
## $ age36 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age41 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ age46 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age50plus : num 0 0 0 1 0 0 0 0 0 0 ...
## $ agebelow21 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalStatusDivorced : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalStatusMarried.partner : num 0 0 0 0 0 0 0 0 1 1 ...
## $ maritalStatusSingle : num 1 1 0 1 1 1 0 1 0 0 ...
## $ maritalStatusUnmarried.partner : num 0 0 1 0 0 0 1 0 0 0 ...
## $ maritalStatusWidowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ has_children : num 0 1 0 0 0 0 0 0 1 1 ...
## $ educationAssociates.degree : num 0 0 0 0 0 0 0 0 0 0 ...
## $ educationBachelors.degree : num 1 0 0 1 0 1 0 0 1 0 ...
## $ educationGraduate.degree..Masters.or.Doctorate. : num 0 0 0 0 0 0 0 0 0 1 ...
## $ educationHigh.School.Graduate : num 0 1 1 0 0 0 0 0 0 0 ...
## $ educationSome.college...no.degree : num 0 0 0 0 1 0 1 1 0 0 ...
## $ educationSome.High.School : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationArchitecture...Engineering : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationArts.Design.Entertainment.Sports...Media : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationBuilding...Grounds.Cleaning...Maintenance: num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationBusiness...Financial : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationCommunity...Social.Services : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationComputer...Mathematical : num 0 0 0 1 0 0 0 0 1 0 ...
## $ occupationConstruction...Extraction : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationEducation.Training.Library : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationFarming.Fishing...Forestry : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationFood.Preparation...Serving.Related : num 0 0 1 0 0 0 0 0 0 1 ...
## $ occupationHealthcare.Practitioners...Technical : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationHealthcare.Support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationInstallation.Maintenance...Repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationLegal : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationLife.Physical.Social.Science : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationManagement : num 1 1 0 0 0 0 0 0 0 0 ...
## $ occupationOffice...Administrative.Support : num 0 0 0 0 0 1 0 0 0 0 ...
## $ occupationPersonal.Care...Service : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationProduction.Occupations : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationProtective.Service : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationRetired : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationSales...Related : num 0 0 0 0 0 0 1 0 0 0 ...
## $ occupationStudent : num 0 0 0 0 1 0 0 0 0 0 ...
## $ occupationTransportation...Material.Moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ occupationUnemployed : num 0 0 0 0 0 0 0 1 0 0 ...
## $ income.100000.or.More : num 0 0 0 0 0 0 0 0 0 1 ...
## $ income.12500....24999 : num 0 0 1 0 1 1 0 1 0 0 ...
## $ income.25000....37499 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ income.37500....49999 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ income.50000....62499 : num 0 1 0 0 0 0 1 0 0 0 ...
## $ income.62500....74999 : num 0 0 0 0 0 0 0 0 1 0 ...
## $ income.75000....87499 : num 0 0 0 1 0 0 0 0 0 0 ...
## $ income.87500....99999 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ incomeLess.than..12500 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ car : num 1 1 1 1 1 1 1 1 1 1 ...
## $ carCar.that.is.too.old.to.install.Onstar..D : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carcrossover : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cardo.not.drive : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carMazda5 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carScooter.and.motorcycle : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bar1.3 : num 0 0 1 1 0 0 1 1 0 0 ...
## $ Bar4.8 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ Bargt8 : num 0 1 0 0 0 0 0 0 0 0 ...
## $ Barless1 : num 0 0 0 0 1 1 0 0 0 1 ...
## $ Barnever : num 0 0 0 0 0 0 0 0 1 0 ...
## $ CoffeeHouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHouse1.3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CoffeeHouse4.8 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ CoffeeHousegt8 : num 0 1 0 0 0 0 0 0 0 1 ...
## $ CoffeeHouseless1 : num 0 0 0 0 1 1 0 1 1 0 ...
## $ CoffeeHousenever : num 1 0 1 1 0 0 0 0 0 0 ...
## $ CarryAway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CarryAway1.3 : num 0 0 0 1 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
##Persistent Homology of in.vehicle.coupon.recommendation_dataset dataset
# calculate persistent homology for sample of 100 points in the in.vehicle.coupon.recommendation Dataset
phom_in.vehicle.coupon.recommendation_one_hot_100_df_data_df <- calculate_homology(in.vehicle.coupon.recommendation_one_hot_100_df)
# plot barcode for sample 100 in.vehicle.coupon.recommendation Dataset
plot_barcode(phom_in.vehicle.coupon.recommendation_one_hot_100_df_data_df)

# plot persistent diagram of in.vehicle.coupon.recommendation_one_hot_100_df Dataset
plot_persist(phom_in.vehicle.coupon.recommendation_one_hot_100_df_data_df)

##One hot encoding for RT_IOT2022 dataset
library(caret)
#define one-hot encoding function
dummy_RT_IOT2022<- dummyVars(" ~ .", data=RT_IOT2022)
#perform one-hot encoding on data frame
RT_IOT2022_one_hot_df <- data.frame(predict(dummy_RT_IOT2022, newdata=RT_IOT2022))
#str final data frame
head(str(RT_IOT2022_one_hot_df))
## 'data.frame': 123117 obs. of 107 variables:
## $ X : num 0 1 2 3 4 5 6 7 8 9 ...
## $ id.orig_p : num 38667 51143 44761 60893 51087 ...
## $ id.resp_p : num 1883 1883 1883 1883 1883 ...
## $ protoicmp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ prototcp : num 1 1 1 1 1 1 1 1 1 1 ...
## $ protoudp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ service. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicedhcp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicedns : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicehttp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ serviceirc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicemqtt : num 1 1 1 1 1 1 1 1 1 1 ...
## $ servicentp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ serviceradius : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicessh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicessl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_duration : num 32 31.9 32.1 32 31.9 ...
## $ fwd_pkts_tot : num 9 9 9 9 9 9 9 9 9 9 ...
## $ bwd_pkts_tot : num 5 5 5 5 5 5 5 5 5 5 ...
## $ fwd_data_pkts_tot : num 3 3 3 3 3 3 3 3 3 3 ...
## $ bwd_data_pkts_tot : num 3 3 3 3 3 3 3 3 3 3 ...
## $ fwd_pkts_per_sec : num 0.281 0.282 0.28 0.282 0.282 ...
## $ bwd_pkts_per_sec : num 0.156 0.157 0.156 0.156 0.157 ...
## $ flow_pkts_per_sec : num 0.437 0.439 0.436 0.438 0.439 ...
## $ down_up_ratio : num 0.556 0.556 0.556 0.556 0.556 ...
## $ fwd_header_size_tot : num 296 296 296 296 296 296 296 296 296 296 ...
## $ fwd_header_size_min : num 32 32 32 32 32 32 32 32 32 32 ...
## $ fwd_header_size_max : num 40 40 40 40 40 40 40 40 40 40 ...
## $ bwd_header_size_tot : num 168 168 168 168 168 168 168 168 168 168 ...
## $ bwd_header_size_min : num 32 32 32 32 32 32 32 32 32 32 ...
## $ bwd_header_size_max : num 40 40 40 40 40 40 40 40 40 40 ...
## $ flow_FIN_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_SYN_flag_count : num 2 2 2 2 2 2 2 2 2 2 ...
## $ flow_RST_flag_count : num 1 1 1 1 1 1 1 1 1 1 ...
## $ fwd_PSH_flag_count : num 3 3 3 3 3 3 3 3 3 3 ...
## $ bwd_PSH_flag_count : num 3 3 3 3 3 3 3 3 3 3 ...
## $ flow_ACK_flag_count : num 13 13 13 13 13 13 13 13 13 13 ...
## $ fwd_URG_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_URG_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_CWR_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_ECE_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_pkts_payload.max : num 33 33 33 33 33 33 33 33 33 33 ...
## $ fwd_pkts_payload.tot : num 76 76 74 74 76 76 76 76 76 76 ...
## $ fwd_pkts_payload.avg : num 8.44 8.44 8.22 8.22 8.44 ...
## $ fwd_pkts_payload.std : num 13.1 13.1 12.9 12.9 13.1 ...
## $ bwd_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_pkts_payload.max : num 23 23 21 21 23 23 23 23 23 23 ...
## $ bwd_pkts_payload.tot : num 32 32 30 30 32 32 32 32 32 32 ...
## $ bwd_pkts_payload.avg : num 6.4 6.4 6 6 6.4 6.4 6.4 6.4 6.4 6.4 ...
## $ bwd_pkts_payload.std : num 9.56 9.56 8.69 8.69 9.56 ...
## $ flow_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_pkts_payload.max : num 33 33 33 33 33 33 33 33 33 33 ...
## $ flow_pkts_payload.tot : num 108 108 104 104 108 108 108 108 108 108 ...
## $ flow_pkts_payload.avg : num 7.71 7.71 7.43 7.43 7.71 ...
## $ flow_pkts_payload.std : num 11.6 11.6 11.2 11.2 11.6 ...
## $ fwd_iat.min : num 762 247 284 289 388 ...
## $ fwd_iat.max : num 29729183 29855277 29842149 29913775 29814705 ...
## $ fwd_iat.tot : num 32011598 31883584 32124053 31961063 31902362 ...
## $ fwd_iat.avg : num 4001450 3985448 4015507 3995133 3987795 ...
## $ fwd_iat.std : num 10403074 10463456 10442378 10482528 10447019 ...
## $ bwd_iat.min : num 4439 4214 2457 3934 3005 ...
## $ bwd_iat.max : num 1511694 1576436 1476049 1551892 1632083 ...
## $ bwd_iat.tot : num 2026391 1876261 2013770 1883784 1935984 ...
## $ bwd_iat.avg : num 506598 469065 503442 470946 483996 ...
## $ bwd_iat.std : num 680406 741352 660344 724569 768543 ...
## $ flow_iat.min : num 762 247 284 289 388 ...
## $ flow_iat.max : num 29729183 29855277 29842149 29913775 29814705 ...
## $ flow_iat.tot : num 32011598 31883584 32124053 31961063 31902362 ...
## $ flow_iat.avg : num 2462431 2452583 2471081 2458543 2454028 ...
## $ flow_iat.std : num 8199747 8242459 8230593 8257786 8230584 ...
## $ payload_bytes_per_second : num 3.37 3.39 3.24 3.25 3.39 ...
## $ fwd_subflow_pkts : num 3 3 3 3 3 3 3 3 3 3 ...
## $ bwd_subflow_pkts : num 1.67 1.67 1.67 1.67 1.67 ...
## $ fwd_subflow_bytes : num 25.3 25.3 24.7 24.7 25.3 ...
## $ bwd_subflow_bytes : num 10.7 10.7 10 10 10.7 ...
## $ fwd_bulk_bytes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_bytes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_bulk_packets : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_packets : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_bulk_rate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_rate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ active.min : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.max : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.tot : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.avg : num 2282415 2028307 2281904 2047288 2087657 ...
## $ active.std : num 0 0 0 0 0 0 0 0 0 0 ...
## $ idle.min : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.max : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.tot : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.avg : num 29729183 29855277 29842149 29913775 29814705 ...
## $ idle.std : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_init_window_size : num 64240 64240 64240 64240 64240 ...
## $ bwd_init_window_size : num 26847 26847 26847 26847 26847 ...
## $ fwd_last_window_size : num 502 502 502 502 502 502 502 502 502 502 ...
## $ Attack_typeARP_poisioning : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Attack_typeDDOS_Slowloris : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Attack_typeDOS_SYN_Hping : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Attack_typeMetasploit_Brute_Force_SSH: num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
RT_IOT2022_one_hot_df_1000 <- RT_IOT2022_one_hot_df[sample(nrow(RT_IOT2022_one_hot_df), size = 1000, replace = FALSE), ]
head(str(RT_IOT2022_one_hot_df_1000))
## 'data.frame': 1000 obs. of 107 variables:
## $ X : num 2139 70389 70221 71870 86340 ...
## $ id.orig_p : num 33037 60889 60822 61663 1698 ...
## $ id.resp_p : num 1883 21 21 21 21 ...
## $ protoicmp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ prototcp : num 1 1 1 1 1 1 1 1 1 1 ...
## $ protoudp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ service. : num 0 1 1 1 1 1 1 0 1 1 ...
## $ servicedhcp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicedns : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicehttp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ serviceirc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicemqtt : num 1 0 0 0 0 0 0 0 0 0 ...
## $ servicentp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ serviceradius : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicessh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servicessl : num 0 0 0 0 0 0 0 1 0 0 ...
## $ flow_duration : num 4.19e+01 0.00 0.00 1.00e-06 1.00e-06 ...
## $ fwd_pkts_tot : num 10 1 1 1 1 1 1 14 1 1 ...
## $ bwd_pkts_tot : num 6 0 0 1 1 1 0 14 1 1 ...
## $ fwd_data_pkts_tot : num 3 1 1 1 1 1 1 5 0 1 ...
## $ bwd_data_pkts_tot : num 4 0 0 0 0 0 0 6 0 0 ...
## $ fwd_pkts_per_sec : num 2.39e-01 0.00 0.00 1.05e+06 8.39e+05 ...
## $ bwd_pkts_per_sec : num 1.43e-01 0.00 0.00 1.05e+06 8.39e+05 ...
## $ flow_pkts_per_sec : num 3.82e-01 0.00 0.00 2.10e+06 1.68e+06 ...
## $ down_up_ratio : num 0.6 0 0 1 1 1 0 1 1 1 ...
## $ fwd_header_size_tot : num 328 20 20 20 20 20 20 300 20 20 ...
## $ fwd_header_size_min : num 32 20 20 20 20 20 20 20 20 20 ...
## $ fwd_header_size_max : num 40 20 20 20 20 20 20 40 20 20 ...
## $ bwd_header_size_tot : num 200 0 0 20 20 20 0 292 20 20 ...
## $ bwd_header_size_min : num 32 0 0 20 20 20 0 20 20 20 ...
## $ bwd_header_size_max : num 40 0 0 20 20 20 0 32 20 20 ...
## $ flow_FIN_flag_count : num 0 0 0 0 0 0 0 2 1 0 ...
## $ flow_SYN_flag_count : num 2 1 1 1 1 1 1 2 0 1 ...
## $ flow_RST_flag_count : num 1 0 0 1 1 1 0 2 1 1 ...
## $ fwd_PSH_flag_count : num 3 0 0 0 0 0 0 5 1 0 ...
## $ bwd_PSH_flag_count : num 4 0 0 0 0 0 0 3 0 0 ...
## $ flow_ACK_flag_count : num 15 0 0 1 1 1 0 25 1 1 ...
## $ fwd_URG_flag_count : num 0 0 0 0 0 0 0 0 1 0 ...
## $ bwd_URG_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_CWR_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ flow_ECE_flag_count : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_pkts_payload.min : num 0 120 120 120 120 120 120 0 0 120 ...
## $ fwd_pkts_payload.max : num 33 120 120 120 120 120 120 517 0 120 ...
## $ fwd_pkts_payload.tot : num 77 120 120 120 120 ...
## $ fwd_pkts_payload.avg : num 7.7 120 120 120 120 ...
## $ fwd_pkts_payload.std : num 12.8 0 0 0 0 ...
## $ bwd_pkts_payload.min : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_pkts_payload.max : num 22 0 0 0 0 ...
## $ bwd_pkts_payload.tot : num 35 0 0 0 0 ...
## $ bwd_pkts_payload.avg : num 5.83 0 0 0 0 ...
## $ bwd_pkts_payload.std : num 8.21 0 0 0 0 ...
## $ flow_pkts_payload.min : num 0 120 120 0 0 0 120 0 0 0 ...
## $ flow_pkts_payload.max : num 33 120 120 120 120 ...
## $ flow_pkts_payload.tot : num 112 120 120 120 120 ...
## $ flow_pkts_payload.avg : num 7 120 120 60 60 ...
## $ flow_pkts_payload.std : num 11 0 0 84.9 84.9 ...
## $ fwd_iat.min : num 241 0 0 0 0 ...
## $ fwd_iat.max : num 39889886 0 0 0 0 ...
## $ fwd_iat.tot : num 41910572 0 0 0 0 ...
## $ fwd_iat.avg : num 4656730 0 0 0 0 ...
## $ fwd_iat.std : num 13219944 0 0 0 0 ...
## $ bwd_iat.min : num 77 0 0 0 0 ...
## $ bwd_iat.max : num 1567814 0 0 0 0 ...
## $ bwd_iat.tot : num 1871896 0 0 0 0 ...
## $ bwd_iat.avg : num 374379 0 0 0 0 ...
## $ bwd_iat.std : num 671237 0 0 0 0 ...
## $ flow_iat.min : num 77.009 0 0 0.954 1.192 ...
## $ flow_iat.max : num 3.99e+07 0.00 0.00 9.54e-01 1.19 ...
## $ flow_iat.tot : num 4.19e+07 0.00 0.00 9.54e-01 1.19 ...
## $ flow_iat.avg : num 2.79e+06 0.00 0.00 9.54e-01 1.19 ...
## $ flow_iat.std : num 10268536 0 0 0 0 ...
## $ payload_bytes_per_second : num 2.67 0.00 0.00 1.26e+08 1.01e+08 ...
## $ fwd_subflow_pkts : num 3.33 1 1 1 1 ...
## $ bwd_subflow_pkts : num 2 0 0 1 1 1 0 14 1 1 ...
## $ fwd_subflow_bytes : num 25.7 120 120 120 120 ...
## $ bwd_subflow_bytes : num 11.7 0 0 0 0 ...
## $ fwd_bulk_bytes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_bytes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_bulk_packets : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_packets : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_bulk_rate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bwd_bulk_rate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ active.min : num 2.02e+06 0.00 0.00 9.54e-01 1.19 ...
## $ active.max : num 2.02e+06 0.00 0.00 9.54e-01 1.19 ...
## $ active.tot : num 2.02e+06 0.00 0.00 9.54e-01 1.19 ...
## $ active.avg : num 2.02e+06 0.00 0.00 9.54e-01 1.19 ...
## $ active.std : num 0 0 0 0 0 0 0 0 0 0 ...
## $ idle.min : num 39889886 0 0 0 0 ...
## $ idle.max : num 39889886 0 0 0 0 ...
## $ idle.tot : num 39889886 0 0 0 0 ...
## $ idle.avg : num 39889886 0 0 0 0 ...
## $ idle.std : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fwd_init_window_size : num 64240 64 64 64 64 ...
## $ bwd_init_window_size : num 26847 0 0 0 0 ...
## $ fwd_last_window_size : num 502 64 64 64 64 ...
## $ Attack_typeARP_poisioning : num 0 0 0 0 0 0 0 1 0 0 ...
## $ Attack_typeDDOS_Slowloris : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Attack_typeDOS_SYN_Hping : num 0 1 1 1 1 1 1 0 0 1 ...
## $ Attack_typeMetasploit_Brute_Force_SSH: num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
##Persistent Homology of 1000 sampled points of the RT_IOT2022_one_hot_df dataset
# calculate persistent homology for 1000 sampled points of the RT_IOT2022 Dataset
phom_RT_IOT2022_1000_data_df <- calculate_homology(RT_IOT2022_one_hot_df_1000)
# plot barcode for 1000 sampled points of the RT_IOT2022 Dataset
plot_barcode(phom_RT_IOT2022_1000_data_df)

# plot persistent diagram of of 1000 points of the RT_IOT2022 Dataset
plot_persist(phom_RT_IOT2022_1000_data_df)

##Persistent Homology of PP_URL_Culled Dataset
#Remove 5 useless variables from dataset preventing persistent homology.
PP_URL_Culled<-PhiUSIIL_Phishing_URL_Dataset[,-c(1,2,4,7,30)]
PP_URL_Culled_1000 <- PP_URL_Culled[sample(nrow(PP_URL_Culled), size = 1000, replace = FALSE), ]
head(str(PP_URL_Culled_1000))
## 'data.frame': 1000 obs. of 51 variables:
## $ URLLength : int 35 32 41 62 20 25 30 27 35 37 ...
## $ DomainLength : int 26 25 24 34 13 18 23 20 28 29 ...
## $ IsDomainIP : int 0 0 0 0 0 0 0 0 0 0 ...
## $ URLSimilarityIndex : num 69.5 100 39.9 24.3 100 ...
## $ CharContinuationRate : num 0.591 1 0.857 0.258 1 ...
## $ TLDLegitimateProb : num 0.52291 0.52291 0.00641 0.01293 0.00598 ...
## $ URLCharProb : num 0.0533 0.0631 0.0596 0.0589 0.0495 ...
## $ TLDLength : int 3 3 2 2 2 3 2 2 3 3 ...
## $ NoOfSubDomain : int 1 1 1 1 1 1 2 1 1 1 ...
## $ HasObfuscation : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfObfuscatedChar : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ObfuscationRatio : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfLettersInURL : int 23 19 29 46 7 12 16 13 22 21 ...
## $ LetterRatioInURL : num 0.657 0.594 0.707 0.742 0.35 0.48 0.533 0.481 0.629 0.568 ...
## $ NoOfDegitsInURL : int 1 0 0 0 0 0 0 0 0 5 ...
## $ DegitRatioInURL : num 0.029 0 0 0 0 0 0 0 0 0.135 ...
## $ NoOfEqualsInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfQMarkInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfAmpersandInURL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfOtherSpecialCharsInURL: int 3 1 4 8 1 1 2 2 1 3 ...
## $ SpacialCharRatioInURL : num 0.086 0.031 0.098 0.129 0.05 0.04 0.067 0.074 0.029 0.081 ...
## $ IsHTTPS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LineOfCode : int 228 1521 16 126 238 540 1164 236 2752 48 ...
## $ LargestLineLength : int 77763 10936 286 108 181 1350 16009 434 9381 710 ...
## $ HasTitle : int 1 1 1 1 1 1 1 1 1 0 ...
## $ DomainTitleMatchScore : num 0 100 0 0 100 100 0 100 100 0 ...
## $ URLTitleMatchScore : num 0 100 0 0 100 100 0 100 100 0 ...
## $ HasFavicon : int 0 1 0 0 1 0 0 1 0 0 ...
## $ Robots : int 0 1 0 0 0 0 1 1 1 0 ...
## $ IsResponsive : int 0 1 1 1 1 1 1 1 1 0 ...
## $ NoOfURLRedirect : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfSelfRedirect : int 0 0 0 0 0 0 0 0 0 0 ...
## $ HasDescription : int 0 1 0 0 1 0 0 0 1 0 ...
## $ NoOfPopup : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NoOfiFrame : int 2 12 0 0 0 3 4 1 0 0 ...
## $ HasExternalFormSubmit : int 0 0 0 0 0 0 0 0 0 0 ...
## $ HasSocialNet : int 0 1 0 0 0 1 1 0 1 0 ...
## $ HasSubmitButton : int 0 1 0 1 0 0 1 0 0 0 ...
## $ HasHiddenFields : int 0 1 0 0 0 1 1 0 0 0 ...
## $ HasPasswordField : int 0 0 0 1 0 0 1 0 0 0 ...
## $ Bank : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Pay : int 0 0 0 0 0 1 1 0 0 0 ...
## $ Crypto : int 0 0 0 0 0 0 0 0 0 0 ...
## $ HasCopyrightInfo : int 0 1 0 0 1 0 1 1 1 0 ...
## $ NoOfImage : int 0 85 0 1 12 8 21 11 65 0 ...
## $ NoOfCSS : int 0 47 0 1 3 11 9 10 3 0 ...
## $ NoOfJS : int 1 57 0 0 3 11 31 6 5 0 ...
## $ NoOfSelfRef : int 0 193 0 0 4 19 40 0 70 0 ...
## $ NoOfEmptyRef : int 0 8 0 2 0 3 3 0 1 0 ...
## $ NoOfExternalRef : int 1 245 1 0 14 4 54 2 230 1 ...
## $ label : int 0 1 0 0 1 1 1 1 1 0 ...
## NULL
##Persistent Homology of PP_URL_Culled dataset
# calculate persistent homology for PP_URL_Culled Dataset
phom_PP_URL_Culled_1000_data_df <- calculate_homology(PP_URL_Culled_1000)
# plot barcode for PP_URL_Culled Dataset
plot_barcode(phom_PP_URL_Culled_1000_data_df)

# plot persistent diagram of phom_PP_URL Dataset
plot_persist(phom_PP_URL_Culled_1000_data_df)

##One hot encoding for BitcoinHeistData.2.culled2 dataset
#Bitcoin data culled
BitcoinHeistData.2.culled<-BitcoinHeistData.2[,-1]
BitcoinHeistData.2.culled$label2<-str_sub(BitcoinHeistData.2.culled$label,1,5)
BitcoinHeistData.2.culled2<-BitcoinHeistData.2.culled[,-9]
library(caret)
#define one-hot encoding function
dummy_BitcoinHeistData.2.culled2<- dummyVars(" ~ .", data=BitcoinHeistData.2.culled2)
#perform one-hot encoding on data frame
BitcoinHeistData.2.culled2_one_hot_df <- data.frame(predict(dummy_BitcoinHeistData.2.culled2, newdata=BitcoinHeistData.2.culled2))
#str final data frame
head(str(BitcoinHeistData.2.culled2_one_hot_df))
## 'data.frame': 2916697 obs. of 12 variables:
## $ year : num 2017 2016 2016 2016 2016 ...
## $ day : num 11 132 246 322 238 96 225 324 298 62 ...
## $ length : num 18 44 0 72 144 144 142 78 144 112 ...
## $ weight : num 0.008333 0.000244 1 0.003906 0.072848 ...
## $ count : num 1 1 1 1 456 ...
## $ looped : num 0 0 0 0 0 0 0 0 0 0 ...
## $ neighbors : num 2 1 2 2 1 1 2 2 2 1 ...
## $ income : num 1.00e+08 1.00e+08 2.00e+08 7.12e+07 2.00e+08 ...
## $ label2montr: num 0 0 0 0 0 0 0 0 0 0 ...
## $ label2padua: num 0 0 0 0 0 0 0 0 0 0 ...
## $ label2princ: num 1 1 1 1 1 1 1 1 1 1 ...
## $ label2white: num 0 0 0 0 0 0 0 0 0 0 ...
## NULL
BitcoinHeistData.2.culled2_one_hot_1000_df <- BitcoinHeistData.2.culled2_one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(BitcoinHeistData.2.culled2_one_hot_1000_df))
## 'data.frame': 1000 obs. of 12 variables:
## $ year : num 2014 2013 2015 2014 2016 ...
## $ day : num 120 263 202 80 155 85 210 331 166 273 ...
## $ length : num 144 144 16 144 12 0 0 2 18 4 ...
## $ weight : num 0.000448 1.927315 0.024306 1.056854 0.0625 ...
## $ count : num 1098 1796 6 1609 1 ...
## $ looped : num 0 46 0 2 0 0 0 0 0 0 ...
## $ neighbors : num 1 4 1 2 2 2 2 2 2 2 ...
## $ income : num 2.38e+08 2.05e+09 2.64e+08 2.52e+08 1.20e+08 ...
## $ label2montr: num 0 1 0 0 1 0 0 0 1 1 ...
## $ label2padua: num 1 0 1 1 0 0 0 0 0 0 ...
## $ label2princ: num 0 0 0 0 0 1 1 1 0 0 ...
## $ label2white: num 0 0 0 0 0 0 0 0 0 0 ...
## NULL
##Persistent Homology of PP_URL_Culled dataset
# calculate persistent homology for BitcoinHeistData.2.culled Dataset
phom_BitcoinHeistData.2.culled2_one_hot_1000_df <- calculate_homology(BitcoinHeistData.2.culled2_one_hot_1000_df)
# plot barcode for BitcoinHeistData.2.culled2_one_hot_1000_df Dataset
plot_barcode(phom_BitcoinHeistData.2.culled2_one_hot_1000_df)

# plot persistent diagram of BitcoinHeistData.2.culled2_one_hot_1000_df Dataset
plot_persist(phom_BitcoinHeistData.2.culled2_one_hot_1000_df)

#Diabetic data culled
diabetic_data_culled<-diabetic_data[,-c(1,2,13,14)]
##One hot encoding for diabetic_data_culled dataset
library(caret)
#define one-hot encoding function
dummy_diabetic_data_culled<- dummyVars(" ~ .", data=diabetic_data_culled)
#perform one-hot encoding on data frame
#diabetic_data_culled_one_hot_df <- data.frame(predict(dummy_diabetic_data_culled, newdata=diabetic_data_culled))
##str final data frame
#str(diabetic_data_culled_one_hot_df)
#diabetic_data_culled_one_hot_1000_df <- adult.one_hot_df[sample(nrow(diabetic_data_culled_one_hot_df, size = 1000, replace = FALSE), ]
##Persistent Homology of Diabetic dataset
# calculate persistent homology for diabetic_data_culled Dataset
#phom_diabetic_data_culled_one_hot_1000_df <- calculate_homology(diabetic_data_culled_one_hot_1000_df)
# plot barcode for diabetic_data_culled_one_hot_1000 Dataset
#plot_barcode(phom_diabetic_data_culled_one_hot_1000_df)
# plot persistent diagram of diabetic_data_culled_one_hot_1000 Dataset
#plot_persist(phom_diabetic_data_culled_one_hot_1000_df)
#Poker Hand Dataset
poker_hand_training_true_1000 <- poker.hand.training.true[sample(nrow(poker.hand.training.true), size = 1000, replace = FALSE), ]
##Persistent Homology of Poker Hand Training dataset
# calculate persistent homology for poker_hand_training_true_1000 Dataset
phom_poker_hand_training_true_1000 <- calculate_homology(poker_hand_training_true_1000)
# plot barcode for poker_hand_training_true_1000 Dataset
plot_barcode(phom_poker_hand_training_true_1000)

# plot persistent diagram of poker_hand_training_true_1000 Dataset
plot_persist(phom_poker_hand_training_true_1000)

#Internet Firewall Dataset
IntFirewallData<-log2
##One hot encoding for BitcoinHeistData.2.culled2 dataset
library(caret)
#define one-hot encoding function
dummy_IntFirewallData <- dummyVars(" ~ .", data= IntFirewallData)
#perform one-hot encoding on data frame
IntFirewallData_one_hot_df <- data.frame(predict(dummy_IntFirewallData, newdata= IntFirewallData))
#str final data frame
head(str(IntFirewallData_one_hot_df))
## 'data.frame': 65532 obs. of 15 variables:
## $ Source.Port : num 57222 56258 6881 50553 50002 ...
## $ Destination.Port : num 53 3389 50321 3389 443 ...
## $ NAT.Source.Port : num 54587 56258 43265 50553 45848 ...
## $ NAT.Destination.Port: num 53 3389 50321 3389 443 ...
## $ Actionallow : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Actiondeny : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Actiondrop : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Actionreset.both : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Bytes : num 177 4768 238 3327 25358 ...
## $ Bytes.Sent : num 94 1600 118 1438 6778 ...
## $ Bytes.Received : num 83 3168 120 1889 18580 ...
## $ Packets : num 2 19 2 15 31 21 6 23 1 31 ...
## $ Elapsed.Time..sec. : num 30 17 1199 17 16 ...
## $ pkts_sent : num 1 10 1 8 13 12 3 12 1 15 ...
## $ pkts_received : num 1 9 1 7 18 9 3 11 0 16 ...
## NULL
IntFirewallData_one_hot_1000_df <- IntFirewallData_one_hot_df[sample(nrow(IntFirewallData_one_hot_df), size = 1000, replace = FALSE), ]
IntFirewallData_one_hot_1000_df
## Source.Port Destination.Port NAT.Source.Port NAT.Destination.Port
## 4519 55346 443 44978 443
## 13921 54883 51505 0 0
## 21316 62446 445 0 0
## 61397 53535 1393 14227 1393
## 59091 52802 56205 0 0
## 61449 51584 443 13301 443
## 21342 62243 445 0 0
## 10124 38658 443 22719 443
## 62762 57470 51413 17602 51413
## 42177 60052 37965 0 0
## 21317 62442 445 0 0
## 2656 55055 445 0 0
## 35422 55075 53 29760 53
## 4781 57067 53 57189 53
## 55032 55232 53 8300 53
## 2793 28579 64147 0 0
## 63544 62569 44847 0 0
## 1966 41614 5222 29157 5222
## 58168 1925 53 9734 53
## 37824 60207 80 10851 80
## 48250 50316 80 40877 80
## 41867 49858 443 3076 443
## 32808 53455 53 0 0
## 60722 51653 445 0 0
## 12209 49417 53 14090 53
## 58810 60354 443 37287 443
## 43979 27443 23 0 0
## 36938 49152 53 17272 53
## 14813 63932 445 0 0
## 59979 58046 53 58046 53
## 55479 50440 53 39485 53
## 41314 57470 6889 19938 6889
## 22837 20962 22114 0 0
## 56850 34114 443 15852 443
## 7337 58638 5900 0 0
## 23632 61750 445 0 0
## 16543 47034 443 4001 443
## 61906 55892 445 0 0
## 64959 50010 445 0 0
## 32094 64173 445 0 0
## 38121 57685 445 0 0
## 48018 27005 27015 19164 27015
## 32945 59119 445 0 0
## 50828 56644 443 7171 443
## 15931 2991 23 0 0
## 4392 53935 42874 31408 42874
## 62548 53064 443 10926 443
## 28973 51959 42269 61621 42269
## 63723 2267 53 3037 53
## 52780 50280 57470 0 0
## 47578 55009 445 0 0
## 22171 63118 7000 63118 7000
## 50173 52763 53 0 0
## 22596 53814 6725 20985 6725
## 41014 51320 44847 0 0
## 37644 58054 445 0 0
## 44523 43725 54532 0 0
## 40851 26900 27017 5951 27017
## 2252 50453 445 0 0
## 43215 61104 445 0 0
## 6227 41207 80 55374 80
## 3920 56131 53 35553 53
## 60301 16524 22114 0 0
## 3081 55107 53 60741 53
## 8570 57220 53 51033 53
## 65470 51689 4765 24946 4765
## 28805 62824 37807 0 0
## 41110 63447 44847 0 0
## 25385 49516 445 0 0
## 44242 53290 443 5427 443
## 35075 7226 50584 0 0
## 32480 49235 53 22418 53
## 54476 55521 53 35090 53
## 51675 52887 37807 0 0
## 44625 31574 53 31574 53
## 45247 45339 443 35795 443
## 39488 62243 445 0 0
## 8487 51414 50584 0 0
## 5842 64670 53 60367 53
## 23741 58344 53 58344 53
## 59676 40232 53 19769 53
## 58646 48478 49903 0 0
## 28429 59886 64147 0 0
## 64070 49505 53 46670 53
## 17301 55942 53 19231 53
## 10147 41803 443 1948 443
## 34908 63399 445 0 0
## 13079 49929 26467 0 0
## 25136 65313 53 7915 53
## 53449 57028 8080 10225 8080
## 17039 57119 53 0 0
## 65011 55044 445 0 0
## 30757 56152 53 29649 53
## 32937 59141 445 0 0
## 61594 50490 443 43169 443
## 41164 49740 443 61794 443
## 37252 49502 53 5291 53
## 31019 10736 35440 0 0
## 41787 56946 445 0 0
## 30595 33525 53 23050 53
## 32431 56889 53 49457 53
## 26499 54365 64147 0 0
## 30173 43136 443 64659 443
## 55813 63204 443 63204 443
## 62233 51236 445 0 0
## 51031 58751 9571 0 0
## 23204 55529 44847 0 0
## 23677 49994 26467 0 0
## 45799 51500 443 57250 443
## 54341 24480 35440 0 0
## 26900 1059 44847 0 0
## 51885 5786 53 5786 53
## 1749 30966 44847 0 0
## 64900 57171 53 25491 53
## 58048 65532 53 19828 53
## 57868 52045 445 0 0
## 29692 55066 53 26532 53
## 23808 50544 53 44871 53
## 8669 32914 5222 62781 5222
## 55070 63782 6969 33183 6969
## 4420 55122 443 57128 443
## 4438 65220 53 17734 53
## 13451 55227 53 7352 53
## 20527 60377 443 19236 443
## 11491 51979 80 40174 80
## 2340 53928 15503 0 0
## 42925 56582 445 0 0
## 44740 42252 33303 0 0
## 28485 57987 3478 61463 3478
## 37233 64275 53 47758 53
## 57552 53555 57861 41718 57861
## 4022 58638 5900 58638 5900
## 27255 58276 43890 0 0
## 40267 57403 445 0 0
## 22745 21108 25174 0 0
## 18627 56308 53 17371 53
## 21567 27005 27015 38295 27015
## 14724 36628 80 54402 80
## 63564 50848 445 0 0
## 41864 54993 443 50582 443
## 58136 49378 53 33691 53
## 2761 54995 445 0 0
## 63672 50880 80 36202 80
## 53764 53075 445 0 0
## 21811 58891 443 55777 443
## 9941 52052 80 13756 80
## 25445 56695 53 39577 53
## 9207 15319 51505 0 0
## 22495 50293 445 0 0
## 60133 35509 3389 35509 3389
## 27618 65216 53 41251 53
## 64536 61670 57470 0 0
## 59619 36712 53 0 0
## 33984 63662 445 0 0
## 46358 42225 443 6040 443
## 36883 43604 53 41746 53
## 54999 53093 445 0 0
## 37217 49901 53 43057 53
## 29937 41996 5900 0 0
## 58791 51744 1678 56992 1678
## 9019 59182 53 44334 53
## 27556 61413 80 4426 80
## 50951 27005 27015 5485 27015
## 53670 58082 445 0 0
## 1998 18204 55160 0 0
## 5556 51988 443 46112 443
## 34498 55144 443 5039 443
## 8791 39380 49903 0 0
## 35198 37936 443 26226 443
## 49599 64288 53 63361 53
## 10546 50411 8080 50411 8080
## 7832 58638 5900 0 0
## 5724 6666 138 0 0
## 53065 59574 443 1442 443
## 55222 50878 53733 0 0
## 60563 4910 2149 0 0
## 6649 52195 50930 43224 50930
## 16201 63333 445 0 0
## 37112 54134 64147 0 0
## 39159 51900 15755 25086 15755
## 50280 51433 443 11382 443
## 21664 62164 445 0 0
## 31355 64321 53 62198 53
## 33033 59057 445 0 0
## 38411 51974 80 47213 80
## 36913 51311 22 51311 22
## 58817 30960 45673 1133 45673
## 9343 26900 27017 11369 27017
## 39610 60374 443 9623 443
## 41479 57023 445 0 0
## 36548 58059 445 0 0
## 9626 61369 64147 0 0
## 48451 51689 993 61833 993
## 56007 57259 53 43508 53
## 42634 53294 443 27871 443
## 63808 52482 443 8869 443
## 25140 55752 53 6985 53
## 49492 56793 14902 9937 14902
## 3807 64724 53 10875 53
## 13077 49841 1433 0 0
## 18195 56912 53 5094 53
## 50565 56555 443 29813 443
## 38529 57083 53 19689 53
## 508 52266 31225 1238 31225
## 16306 64950 53 51754 53
## 10393 65006 445 0 0
## 30229 64385 53 21883 53
## 34807 60476 80 59400 80
## 27594 57470 64430 53438 64430
## 21686 22924 25174 0 0
## 63632 45941 23386 0 0
## 31394 52769 443 2032 443
## 51622 27005 27015 45197 27015
## 23887 34788 443 32056 443
## 61223 49435 443 3627 443
## 27278 60625 445 0 0
## 44310 56194 445 0 0
## 15214 63823 445 0 0
## 49693 57310 443 42139 443
## 9004 32911 443 37831 443
## 63308 63732 40642 31051 40642
## 20938 49776 53 56083 53
## 14241 9234 80 38036 80
## 39693 52312 443 50036 443
## 56361 43033 443 43033 443
## 42726 10038 443 58313 443
## 6999 58638 5900 0 0
## 56448 52788 445 0 0
## 51193 57160 53 20461 53
## 10311 31126 53 31126 53
## 53946 53428 445 0 0
## 63100 11136 15503 0 0
## 32192 51107 80 61498 80
## 17834 55534 80 1439 80
## 11840 6881 50239 58874 50239
## 18396 64146 53 63495 53
## 36974 53720 50795 49301 50795
## 51924 27005 27018 55202 27018
## 1154 62069 443 28586 443
## 37419 58497 443 60144 443
## 12061 44925 443 50958 443
## 47095 60039 80 53912 80
## 20634 55384 53 49003 53
## 11071 55439 37965 0 0
## 34051 52793 443 12880 443
## 41868 54994 443 29516 443
## 20200 56226 53 49033 53
## 43393 35120 26467 0 0
## 50015 7104 23 0 0
## 19855 62548 445 0 0
## 19289 51979 25174 0 0
## 59568 50084 26672 0 0
## 20456 55435 25174 0 0
## 51753 50878 53733 0 0
## 56755 63305 62413 0 0
## 1396 50764 445 0 0
## 24703 62478 443 29633 443
## 47162 28176 34021 0 0
## 41100 56825 445 0 0
## 43756 56838 35124 44520 35124
## 47463 59323 53 1369 53
## 36910 56939 53 27310 53
## 21523 60627 443 41376 443
## 51106 61838 37965 0 0
## 36874 50005 53 1672 53
## 24069 64907 53 25739 53
## 48118 59845 445 0 0
## 35405 42278 443 42278 443
## 4221 49945 445 0 0
## 52109 29424 37807 0 0
## 15172 22916 25174 0 0
## 13853 64835 53 41380 53
## 45875 60893 443 5461 443
## 13064 58646 44847 0 0
## 13594 64101 445 0 0
## 18344 56752 53 41172 53
## 38000 37965 23215 60797 23215
## 55635 52251 443 26101 443
## 4484 10013 40024 56811 40024
## 3323 55879 53 40028 53
## 17453 53022 25174 0 0
## 56203 62767 57470 0 0
## 45239 56834 46513 28288 46513
## 58768 49418 27016 27118 27016
## 33224 37653 5223 25600 5223
## 48820 26502 64147 0 0
## 49940 63818 19291 56455 19291
## 49139 60327 80 34206 80
## 28207 60319 445 0 0
## 23732 50269 443 37644 443
## 20414 55384 53 37937 53
## 2857 56794 443 16368 443
## 34688 62343 443 28448 443
## 47876 56343 443 34805 443
## 44706 65064 30188 0 0
## 7466 58638 5900 0 0
## 299 52270 3708 62556 3708
## 13880 48817 45919 48817 45919
## 41753 48125 7548 0 0
## 8882 53614 445 0 0
## 19941 54969 53 12796 53
## 32088 22125 23 0 0
## 6832 49384 445 0 0
## 42415 56456 445 0 0
## 65526 65323 53 33275 53
## 2895 49929 53 57396 53
## 58580 49996 53 62453 53
## 25475 49390 53 24759 53
## 65009 37837 37965 0 0
## 10315 65343 53 9995 53
## 53370 55028 443 55949 443
## 35060 62293 53 18482 53
## 60614 51698 445 0 0
## 50106 53178 26467 0 0
## 8385 65446 445 0 0
## 10103 64733 35440 0 0
## 65059 43611 61903 0 0
## 42426 45654 67 0 0
## 26704 50606 443 59722 443
## 13729 59485 80 39693 80
## 5790 56240 80 56240 80
## 26589 60816 445 0 0
## 16578 57442 2258 57442 2258
## 4490 55939 53 57194 53
## 21604 50949 80 11302 80
## 18982 64595 64147 0 0
## 13694 64229 445 0 0
## 50725 59029 445 0 0
## 39406 443 36576 0 0
## 30599 64421 53 64542 53
## 43405 61016 445 0 0
## 39561 62207 445 0 0
## 9926 54553 53 37407 53
## 9780 65000 445 0 0
## 52308 38630 443 20829 443
## 9877 57149 53 34608 53
## 26063 50629 443 47938 443
## 53399 65392 443 55705 443
## 9951 45057 443 6642 443
## 56439 1027 11392 0 0
## 62318 53128 443 36411 443
## 50879 56966 80 60389 80
## 36016 65264 53 8238 53
## 22278 50211 1688 3994 1688
## 48370 42557 443 48761 443
## 57086 59418 443 19462 443
## 22735 55435 25174 0 0
## 6738 54022 445 0 0
## 20708 64205 53 39534 53
## 46397 49479 56205 0 0
## 22130 61017 64147 0 0
## 40081 64537 53 43379 53
## 6501 63107 443 39326 443
## 18037 63032 22114 0 0
## 18950 50207 53 12070 53
## 43834 55762 53 54612 53
## 60049 55946 443 34803 443
## 60588 51286 445 0 0
## 62148 61705 80 47319 80
## 3520 64568 53 24682 53
## 21810 36889 80 51153 80
## 44472 35078 443 8054 443
## 37859 56496 80 21274 80
## 29802 55019 53 59602 53
## 33632 60438 17148 35885 17148
## 29501 63930 44330 31365 44330
## 46373 64658 53 8151 53
## 28722 59483 23 0 0
## 56963 65131 53 1840 53
## 35515 64868 53 56069 53
## 7642 58638 5900 0 0
## 9193 65321 445 0 0
## 50382 54586 445 0 0
## 27418 53713 443 10339 443
## 52448 63998 21247 0 0
## 23072 64711 53 30437 53
## 30147 43461 35440 0 0
## 44119 60875 443 10095 443
## 64169 56942 443 61847 443
## 12619 56028 53 49252 53
## 28265 63313 443 9713 443
## 6073 49598 445 0 0
## 65152 18889 50584 0 0
## 52381 34897 50584 0 0
## 18375 64106 53 27158 53
## 24541 58862 443 26650 443
## 34697 39098 51505 0 0
## 54059 55540 443 32830 443
## 59260 50979 80 12190 80
## 27153 53787 1 62839 1
## 61217 64966 53 51667 53
## 17470 59668 50584 0 0
## 41021 57148 445 0 0
## 62674 53202 443 64942 443
## 47772 49242 53 1581 53
## 6539 46014 53 46014 53
## 39436 62278 445 0 0
## 49383 54908 445 0 0
## 30540 59639 445 0 0
## 62053 56176 53 42328 53
## 17416 63062 445 0 0
## 7008 58638 5900 0 0
## 57263 26324 30188 0 0
## 37999 50969 443 44036 443
## 43125 50485 80 52438 80
## 10021 65093 445 0 0
## 5759 52077 80 35958 80
## 3543 63464 443 19526 443
## 42108 38852 37965 0 0
## 10333 44894 443 39327 443
## 22250 61933 445 0 0
## 63362 53715 443 45128 443
## 42107 56865 445 0 0
## 37885 63934 53 32673 53
## 48353 49843 53 21434 53
## 56872 55317 443 24510 443
## 45439 27005 32028 64745 32028
## 9708 22542 30188 0 0
## 52345 26791 443 23053 443
## 17467 42238 25174 0 0
## 53809 51776 14645 62934 14645
## 49239 30179 57470 0 0
## 43093 51870 40490 10666 40490
## 32259 8592 35993 0 0
## 12191 49261 53 20112 53
## 15647 61572 53 61572 53
## 4951 55653 53 34737 53
## 23968 49885 445 0 0
## 19777 49765 53 55763 53
## 59511 56620 445 0 0
## 2995 52205 24416 50314 24416
## 5866 34980 443 26995 443
## 28426 51453 64147 0 0
## 3616 50131 445 0 0
## 13391 49667 80 7772 80
## 39370 49531 53 34898 53
## 55447 65396 53 55502 53
## 24869 3478 61463 0 0
## 44665 49855 53 11171 53
## 63753 53392 53 53392 53
## 64667 19099 56205 0 0
## 16094 54516 53 54516 53
## 42002 53719 443 48486 443
## 33910 59099 445 0 0
## 17145 49686 53 47017 53
## 46779 50330 80 61342 80
## 10282 55543 3389 55543 3389
## 67 52193 443 50924 443
## 35695 43578 15503 0 0
## 37981 37965 50321 10306 50321
## 13702 52450 445 0 0
## 1194 56092 53 54654 53
## 61358 51440 445 0 0
## 35140 55457 53 12461 53
## 10447 53193 445 0 0
## 50583 27005 27015 62315 27015
## 17473 51433 445 0 0
## 55426 49229 53 54866 53
## 21111 50857 3389 50857 3389
## 55223 57060 50584 0 0
## 58596 57268 443 29567 443
## 36957 55000 53 36007 53
## 51457 58765 445 0 0
## 2957 56039 50584 0 0
## 3828 58638 5900 58638 5900
## 23251 50089 445 0 0
## 62058 40576 443 40576 443
## 12622 49953 53 38496 53
## 45778 56694 53 57041 53
## 44347 60778 445 0 0
## 40350 48020 7484 0 0
## 4716 49841 445 0 0
## 18161 56603 53 59539 53
## 40307 47910 7464 0 0
## 29537 54464 56205 0 0
## 26918 49170 445 0 0
## 15757 49694 5900 0 0
## 63252 30960 45676 24387 45676
## 19603 56328 36237 0 0
## 41439 35558 15503 0 0
## 12858 54117 443 30153 443
## 38480 65497 53 19517 53
## 65135 61482 443 17696 443
## 9925 56994 53 35888 53
## 8855 40845 64147 0 0
## 12669 49418 22114 0 0
## 44887 55988 445 0 0
## 58489 63128 443 30036 443
## 2916 55725 14704 2998 14704
## 58527 63768 55755 28718 55755
## 32516 56686 443 39990 443
## 44165 49396 53 21722 53
## 28845 60118 445 0 0
## 24255 55057 443 7517 443
## 49754 59389 445 0 0
## 10975 64808 445 0 0
## 43106 49795 53 1877 53
## 126 53987 16571 20059 16571
## 30358 55391 53 35560 53
## 39835 62150 445 0 0
## 11092 62580 23 0 0
## 58970 49418 27016 62306 27016
## 38068 51401 6881 0 0
## 56544 64831 53 56461 53
## 49764 43584 24748 0 0
## 54740 52140 80 9768 80
## 63352 60156 443 49389 443
## 7364 58638 5900 0 0
## 44939 56593 443 58940 443
## 10246 49240 53 19807 53
## 5061 64066 80 28023 80
## 59974 62462 53 62462 53
## 44699 56062 445 0 0
## 31081 64091 53 52948 53
## 48377 65487 53 58054 53
## 51888 53770 64147 0 0
## 62738 53029 443 57596 443
## 62006 59779 443 65358 443
## 21966 53202 443 56977 443
## 42931 58131 30228 0 0
## 25772 64316 53 5577 53
## 49250 53629 51413 42166 51413
## 29343 15978 50584 0 0
## 46830 56553 53 46585 53
## 64366 55887 53 55333 53
## 32378 49692 53 65365 53
## 30177 37578 443 36969 443
## 63199 50522 445 0 0
## 4908 56330 53 48374 53
## 50498 30960 45701 51481 45701
## 8854 60479 25174 0 0
## 63193 50524 445 0 0
## 64109 35101 443 31000 443
## 33814 14036 48817 0 0
## 56606 3303 80 51748 80
## 32064 26606 40108 0 0
## 53274 52643 36237 0 0
## 58640 50181 64147 0 0
## 13415 49576 80 3769 80
## 61401 56759 55201 40009 55201
## 40211 58672 43890 0 0
## 33513 16647 53 16647 53
## 22778 61820 445 0 0
## 3392 60473 22170 57823 22170
## 20397 57035 53 11215 53
## 57570 49367 53 7025 53
## 32793 56644 80 27121 80
## 58987 52751 443 8615 443
## 37694 49543 80 13855 80
## 17478 63587 22114 0 0
## 37416 51967 443 12004 443
## 46727 51510 7000 51510 7000
## 59235 61000 15503 0 0
## 46854 55497 53 48989 53
## 16105 32866 443 34870 443
## 47560 55027 445 0 0
## 64789 50398 80 47950 80
## 15460 18331 53 15455 53
## 56809 19610 9571 0 0
## 16910 40312 55442 0 0
## 53389 56771 53 26360 53
## 43035 56657 443 37201 443
## 60235 443 12105 0 0
## 41093 50338 443 61638 443
## 56126 39921 80 15019 80
## 62730 58981 443 5501 443
## 45910 63837 14023 10553 14023
## 41809 52724 64147 0 0
## 1267 55066 443 25317 443
## 19261 51088 445 0 0
## 2440 64709 53 54232 53
## 648 61183 25174 0 0
## 53215 58234 445 0 0
## 8936 49275 7000 49275 7000
## 15448 50597 80 50597 80
## 15150 64077 25174 0 0
## 37710 52501 53 50653 53
## 25677 55684 53 56434 53
## 43700 53376 443 49944 443
## 12795 9302 443 20327 443
## 59550 56600 445 0 0
## 25684 64894 53 1246 53
## 41534 61619 445 0 0
## 59342 56468 80 52689 80
## 47800 64074 53 30034 53
## 45862 53478 443 15802 443
## 38812 49319 53 1935 53
## 34105 63914 32805 62469 32805
## 40698 49453 443 59315 443
## 47109 64381 53 47350 53
## 6070 443 50238 0 0
## 4936 49869 53 64308 53
## 48585 56455 53 54287 53
## 16349 58944 443 12374 443
## 34766 61802 443 61802 443
## 30601 64081 443 44288 443
## 25224 65398 53 64225 53
## 40039 48706 23393 0 0
## 21238 50265 23 0 0
## 38457 55405 53 55361 53
## 31129 49272 53 25138 53
## 45840 56720 53 11601 53
## 39498 57348 445 0 0
## 937 52194 443 33437 443
## 41842 61524 445 0 0
## 10481 50052 65010 0 0
## 9401 50488 22114 0 0
## 38591 57601 445 0 0
## 32514 55184 53 41873 53
## 36438 49713 53 37729 53
## 46934 56308 53 53406 53
## 654 60128 22114 0 0
## 42409 56769 445 0 0
## 45238 58649 443 35795 443
## 53974 57996 445 0 0
## 20276 62490 445 0 0
## 28562 35617 443 43217 443
## 60239 6676 63836 0 0
## 25939 443 11817 0 0
## 53410 26900 27017 64029 27017
## 4500 55087 53 19901 53
## 15016 53373 64147 0 0
## 44394 53003 11392 0 0
## 22466 50304 445 0 0
## 45696 42252 33306 0 0
## 37977 36126 53 44357 53
## 4418 36897 443 23482 443
## 14324 63810 27064 25460 27064
## 26789 55817 53 30944 53
## 58895 63770 24466 46194 24466
## 19359 58911 443 55607 443
## 55053 64791 443 54264 443
## 23932 3928 64147 0 0
## 18862 56202 53 15464 53
## 33255 49935 53 2977 53
## 4635 59977 30188 0 0
## 57940 39611 443 31755 443
## 40995 65317 51221 0 0
## 17949 47910 7454 0 0
## 51586 39423 80 5862 80
## 50158 3181 53 3181 53
## 42441 61338 445 0 0
## 23657 56770 26467 0 0
## 47732 55062 53 13920 53
## 30165 55727 53 48303 53
## 39466 65494 53 0 0
## 129 64095 1976 59706 1976
## 9309 55034 443 64856 443
## 19029 52503 50584 0 0
## 62043 57235 53 3428 53
## 19374 50377 53 41134 53
## 48164 54636 26467 0 0
## 38602 57573 445 0 0
## 45567 55811 445 0 0
## 25998 49208 6881 3930 6881
## 8097 1230 3389 1230 3389
## 56250 14082 50584 0 0
## 21983 42732 53 10027 53
## 47031 55454 445 0 0
## 22223 50386 445 0 0
## 12560 60481 40075 20355 40075
## 3821 58638 5900 58638 5900
## 46484 55548 445 0 0
## 16385 37565 443 55641 443
## 24859 49624 445 0 0
## 35478 64813 53 46374 53
## 24834 61399 445 0 0
## 44816 50949 443 50949 443
## 50037 56173 37807 0 0
## 57070 35353 443 65162 443
## 31616 59694 50584 0 0
## 10860 53104 445 0 0
## 1625 64171 53 47261 53
## 14913 62394 80 62394 80
## 12625 62763 443 43714 443
## 63031 49249 443 43033 443
## 35057 38834 53 64506 53
## 6705 53759 42873 0 0
## 62480 49673 443 32250 443
## 59860 56547 445 0 0
## 42139 56519 445 0 0
## 43081 49935 80 29909 80
## 59150 52139 445 0 0
## 36476 37965 32405 17637 32405
## 43734 27005 27017 53801 27017
## 6615 52093 80 7625 80
## 10184 45364 443 12194 443
## 9778 65001 445 0 0
## 20712 60226 443 32684 443
## 13640 57300 53 43157 53
## 59389 64494 443 46008 443
## 5523 50547 53 5293 53
## 29064 37737 443 61948 443
## 27924 54744 443 54744 443
## 14337 57073 443 48824 443
## 48881 49905 53 29720 53
## 50532 11133 51505 0 0
## 55425 65357 53 38110 53
## 11647 52140 53155 1111 53155
## 2860 43212 443 25002 443
## 28080 60349 445 0 0
## 29785 53301 11000 8510 11000
## 39675 63457 443 6711 443
## 12618 50293 53 25447 53
## 11206 56661 53 16607 53
## 50716 59034 445 0 0
## 24100 62074 443 62074 443
## 7677 58638 5900 0 0
## 17044 54959 53 17105 53
## 41904 53147 37965 0 0
## 60500 53540 34134 25153 34134
## 23255 61851 445 0 0
## 15255 63639 445 0 0
## 38270 62512 445 0 0
## 60237 62787 51221 0 0
## 40274 57396 445 0 0
## 34028 65507 53 47508 53
## 24954 57565 443 12958 443
## 24440 49726 445 0 0
## 4543 56317 53 26526 53
## 8889 51533 44847 0 0
## 26404 49348 445 0 0
## 1332 55392 445 0 0
## 64088 53375 443 59892 443
## 22392 2422 62507 0 0
## 64184 50196 445 0 0
## 3122 64797 53 38007 53
## 17259 49255 53 62654 53
## 35015 50561 80 19741 80
## 34728 58826 445 0 0
## 29325 10480 26467 0 0
## 56196 52480 445 0 0
## 9522 55199 53 60419 53
## 8004 52518 35440 0 0
## 41413 61661 445 0 0
## 19198 50981 443 28253 443
## 59999 49285 53 48057 53
## 47586 61902 22114 0 0
## 40981 57176 445 0 0
## 24452 61273 445 0 0
## 6731 53928 15503 0 0
## 13483 50023 53 26218 53
## 60650 59411 37965 0 0
## 30117 54084 443 63980 443
## 21332 62247 445 0 0
## 37240 60720 23718 0 0
## 31307 59784 443 16677 443
## 19466 55714 53 2036 53
## 16111 52138 443 52138 443
## 41387 25348 23 0 0
## 18679 46776 443 41505 443
## 50369 61223 37965 0 0
## 41349 40474 23117 0 0
## 29521 64773 445 0 0
## 13011 64419 445 0 0
## 7192 58638 5900 0 0
## 45024 56345 53 18764 53
## 65239 14641 443 14641 443
## 25580 49464 445 0 0
## 37836 57418 53 56760 53
## 31840 58741 443 39394 443
## 22142 33886 52255 0 0
## 62123 55292 53 33154 53
## 12840 8916 443 25420 443
## 25670 35889 443 19640 443
## 31825 57371 53 14687 53
## 25728 65398 53 27490 53
## 65151 49908 445 0 0
## 48911 55773 53 61209 53
## 57171 49983 53 2734 53
## 4733 12290 22114 0 0
## 55275 53031 445 0 0
## 49213 47700 51505 0 0
## 10523 53157 445 0 0
## 20474 57737 53 57737 53
## 23319 32959 443 7767 443
## 37131 62767 445 0 0
## 44281 50584 51651 64737 51651
## 34899 58807 445 0 0
## 25149 56500 53 23471 53
## 10468 443 14591 0 0
## 968 54539 26467 0 0
## 59957 51881 445 0 0
## 26999 44628 80 44628 80
## 4953 49629 53 5781 53
## 16950 13235 44847 0 0
## 43154 57878 55108 0 0
## 10027 65078 445 0 0
## 45632 34873 80 34873 80
## 38163 993 18732 0 0
## 38768 62393 445 0 0
## 41171 49809 443 59332 443
## 6703 49440 445 0 0
## 39721 49632 53 26153 53
## 61720 52788 64147 0 0
## 29920 59840 445 0 0
## 38698 53897 443 3893 443
## 32774 50635 443 12337 443
## 6876 57131 31061 0 0
## 17938 45424 23 0 0
## 29961 62757 44847 0 0
## 37801 50363 53 11154 53
## 64428 50388 53 36843 53
## 46086 55264 53 35896 53
## 23041 55310 53 40255 53
## 42350 47969 53 59484 53
## 348 52041 80 18599 80
## 40766 33922 443 8890 443
## 31159 33545 443 31253 443
## 18513 55711 26467 0 0
## 14618 65318 53 63246 53
## 39559 53147 37965 0 0
## 5038 56951 53 9479 53
## 48438 49475 53 56421 53
## 1831 443 44683 0 0
## 35323 56794 46824 0 0
## 37857 55226 443 46963 443
## 7285 58638 5900 0 0
## 16144 63385 445 0 0
## 47434 56090 53 62587 53
## 9231 53524 445 0 0
## 45212 60970 20541 0 0
## 18148 64965 27049 54387 27049
## 43671 54028 37965 0 0
## 5536 56005 53 33914 53
## 62174 50867 443 26907 443
## 38306 55222 443 38278 443
## 40841 54000 53 38105 53
## 60676 51661 445 0 0
## 10502 64941 445 0 0
## 37436 59966 443 53241 443
## 4795 50336 53 11367 53
## 6256 36665 443 51014 443
## 37313 65395 53 34335 53
## 50684 59050 445 0 0
## 34386 50710 80 11292 80
## 52359 29744 53 29744 53
## 9890 64932 53 8604 53
## 65303 50946 80 36199 80
## 20505 56161 53 2092 53
## 31820 53746 7546 50898 7546
## 40972 56872 445 0 0
## 37810 37009 53 17382 53
## 54407 59168 39004 0 0
## 28776 60167 445 0 0
## 30685 63913 24806 2171 24806
## 39729 49349 80 49349 80
## 64756 50931 80 59351 80
## 2159 56904 80 52022 80
## 43849 58259 53 23484 53
## 43733 27005 27019 64712 27019
## 55828 34166 443 49916 443
## 39333 57163 53 45195 53
## 42519 31995 61689 0 0
## 21231 54188 443 5906 443
## 4956 56941 53 56569 53
## 42738 56981 80 19891 80
## 12737 9307 443 6375 443
## 17356 63089 445 0 0
## 21136 64947 53 11296 53
## 56176 56962 53 40705 53
## 47194 39029 53 55192 53
## 42498 56387 445 0 0
## 47156 64582 53 25024 53
## 25443 123 123 34455 123
## 50638 55041 80 12804 80
## 36705 56275 53 15148 53
## 11590 16620 25174 0 0
## 48757 39039 443 4727 443
## 10712 64270 53 47275 53
## 29445 58779 443 44782 443
## 54553 53419 28189 0 0
## 56357 49197 53 27457 53
## 29759 52572 80 52572 80
## 40385 51956 443 47122 443
## 60274 20961 35440 0 0
## 38239 57937 445 0 0
## 44772 45682 1 0 0
## 46155 55937 443 21508 443
## 60110 43484 443 55073 443
## 44915 53256 443 59014 443
## 22420 62105 445 0 0
## 46789 56667 53 35653 53
## 62768 50699 445 0 0
## 41684 51221 62348 26234 62348
## 11564 48676 53395 0 0
## 38847 56922 53 56922 53
## 21965 1100 27540 45452 27540
## 37530 11478 51221 0 0
## 16743 57387 53 39674 53
## 41515 27452 5222 13129 5222
## 543 55563 53 42353 53
## 3988 65260 53 63253 53
## 57777 37580 443 1520 443
## 25542 43461 35440 0 0
## 58279 38645 51221 0 0
## 35155 52426 443 50838 443
## 5340 58036 443 28433 443
## 27236 62334 443 24621 443
## 44362 20198 17372 0 0
## 32797 49876 53 42623 53
## 52902 53899 443 60991 443
## 42171 2828 35253 0 0
## 3124 57063 53 54503 53
## 12614 65481 53 28484 53
## 65506 35608 443 62915 443
## 18576 13235 44847 0 0
## 53355 65384 53 58692 53
## 42124 17010 57470 0 0
## 38205 56046 53 4218 53
## 31562 3978 35440 0 0
## 56730 56307 52700 0 0
## 1160 42441 123 34753 123
## 46391 18650 23718 0 0
## 7486 58638 5900 0 0
## 54079 61464 80 54948 80
## 5158 12338 64147 0 0
## 35908 63104 445 0 0
## 37132 62766 445 0 0
## 2820 50269 445 0 0
## 13283 49874 53 61806 53
## 23624 57073 57470 0 0
## 56373 5588 5588 0 0
## 57240 53561 35885 5770 35885
## 59170 51688 445 0 0
## 21379 6036 51505 0 0
## 5372 57319 53 22543 53
## 33558 46746 443 46746 443
## 5211 49862 445 0 0
## 48266 49263 53 22784 53
## 5001 50578 80 35081 80
## 53703 53102 443 30366 443
## 2760 43155 35440 0 0
## 42501 56709 445 0 0
## 13955 64030 445 0 0
## 38479 51977 80 44699 80
## 2007 64878 26467 0 0
## 826 37419 443 55901 443
## 52165 58533 445 0 0
## 55552 57094 53 10369 53
## 51633 51221 47391 14393 47391
## 37398 49933 53 6937 53
## 45954 55725 445 0 0
## 57835 3521 6881 0 0
## 5349 53932 27258 53010 27258
## 22697 45510 443 22832 443
## 56202 52475 445 0 0
## 13350 50827 80 8180 80
## 8258 18657 53 18657 53
## 48253 58077 36653 53473 36653
## 20797 56590 25174 0 0
## 248 55658 445 0 0
## 26891 49182 445 0 0
## 63302 55535 53 24278 53
## 4057 59103 443 44683 443
## 29021 51985 80 53937 80
## 26932 48706 23393 0 0
## 22840 61779 445 0 0
## 6230 35797 53 35797 53
## 41527 61192 35440 0 0
## 19558 51221 1950 25491 1950
## 30532 59654 445 0 0
## 30065 9130 80 10198 80
## 44966 56572 443 48876 443
## 49462 53142 443 26091 443
## 31542 64307 445 0 0
## 20933 57089 443 54231 443
## 12377 42490 1433 0 0
## 16459 27995 22114 0 0
## 16917 0 0 0 0
## 48849 15930 53 15930 53
## 58835 52215 445 0 0
## 33042 59051 445 0 0
## 40843 46189 443 46189 443
## 7860 58638 5900 0 0
## 5928 56687 53 1087 53
## 61501 56236 443 26673 443
## 63685 49885 443 30250 443
## 14451 63861 445 0 0
## 8193 51118 80 35500 80
## 63312 48015 443 20608 443
## 40372 56413 51221 0 0
## 59106 56754 445 0 0
## 57863 1284 50584 0 0
## 5425 49776 445 0 0
## 18274 60422 443 51829 443
## 9576 52161 19897 63116 19897
## 45003 51836 37753 57785 37753
## 61281 62780 35440 0 0
## 55213 63547 37965 0 0
## 42753 57791 443 18201 443
## 5847 49561 445 0 0
## 10623 53132 445 0 0
## 7664 58638 5900 0 0
## 3982 64854 53 54158 53
## 62328 53115 443 63976 443
## 7794 58638 5900 0 0
## 54274 49757 53 31813 53
## 11335 9486 443 47731 443
## 22825 61790 445 0 0
## Actionallow Actiondeny Actiondrop Actionreset.both Bytes Bytes.Sent
## 4519 1 0 0 0 13181 6041
## 13921 0 1 0 0 66 66
## 21316 0 0 1 0 66 66
## 61397 1 0 0 0 70 70
## 59091 0 1 0 0 62 62
## 61449 1 0 0 0 12641 5741
## 21342 0 0 1 0 70 70
## 10124 1 0 0 0 1294 864
## 62762 1 0 0 0 330 330
## 42177 0 1 0 0 66 66
## 21317 0 0 1 0 66 66
## 2656 0 0 1 0 70 70
## 35422 1 0 0 0 178 91
## 4781 1 0 0 0 182 93
## 55032 1 0 0 0 177 94
## 2793 0 1 0 0 62 62
## 63544 0 1 0 0 66 66
## 1966 1 0 0 0 768 510
## 58168 1 0 0 0 210 78
## 37824 1 0 0 0 3596 2562
## 48250 1 0 0 0 2062 872
## 41867 1 0 0 0 16232 3319
## 32808 1 0 0 0 82 82
## 60722 0 0 1 0 66 66
## 12209 1 0 0 0 211 110
## 58810 1 0 0 0 10223 3933
## 43979 0 1 0 0 60 60
## 36938 1 0 0 0 307 105
## 14813 0 0 1 0 66 66
## 59979 1 0 0 0 231 88
## 55479 1 0 0 0 216 110
## 41314 1 0 0 0 546 166
## 22837 0 1 0 0 62 62
## 56850 1 0 0 0 6206 1695
## 7337 0 1 0 0 62 62
## 23632 0 0 1 0 66 66
## 16543 1 0 0 0 4531 3268
## 61906 0 0 1 0 70 70
## 64959 0 0 1 0 70 70
## 32094 0 0 1 0 70 70
## 38121 0 0 1 0 70 70
## 48018 1 0 0 0 871 126
## 32945 0 0 1 0 70 70
## 50828 1 0 0 0 12924 3689
## 15931 0 1 0 0 60 60
## 4392 1 0 0 0 1973 1021
## 62548 1 0 0 0 8095 2586
## 28973 1 0 0 0 70 70
## 63723 1 0 0 0 826 826
## 52780 0 1 0 0 78 78
## 47578 0 0 1 0 70 70
## 22171 1 0 0 0 3310 1535
## 50173 1 0 0 0 296 87
## 22596 1 0 0 0 70 70
## 41014 0 1 0 0 66 66
## 37644 0 0 1 0 66 66
## 44523 0 1 0 0 109 109
## 40851 1 0 0 0 422 164
## 2252 0 0 1 0 70 70
## 43215 0 0 1 0 70 70
## 6227 1 0 0 0 79441 3092
## 3920 1 0 0 0 199 94
## 60301 0 1 0 0 62 62
## 3081 1 0 0 0 184 94
## 8570 1 0 0 0 701 88
## 65470 1 0 0 0 66 66
## 28805 0 1 0 0 66 66
## 41110 0 1 0 0 62 62
## 25385 0 0 1 0 70 70
## 44242 1 0 0 0 494 288
## 35075 0 1 0 0 146 146
## 32480 1 0 0 0 172 94
## 54476 1 0 0 0 701 88
## 51675 0 1 0 0 66 66
## 44625 1 0 0 0 213 90
## 45247 1 0 0 0 2312 1382
## 39488 0 0 1 0 70 70
## 8487 0 1 0 0 62 62
## 5842 1 0 0 0 193 102
## 23741 1 0 0 0 847 110
## 59676 1 0 0 0 205 86
## 58646 0 1 0 0 62 62
## 28429 0 1 0 0 72 72
## 64070 1 0 0 0 232 110
## 17301 1 0 0 0 197 102
## 10147 1 0 0 0 11804 4009
## 34908 0 0 1 0 70 70
## 13079 0 1 0 0 66 66
## 25136 1 0 0 0 166 86
## 53449 1 0 0 0 78 78
## 17039 1 0 0 0 318 95
## 65011 0 0 1 0 70 70
## 30757 1 0 0 0 195 102
## 32937 0 0 1 0 70 70
## 61594 1 0 0 0 1353 903
## 41164 1 0 0 0 7554 2281
## 37252 1 0 0 0 193 102
## 31019 0 1 0 0 66 66
## 41787 0 0 1 0 66 66
## 30595 1 0 0 0 168 78
## 32431 1 0 0 0 211 110
## 26499 0 1 0 0 62 62
## 30173 1 0 0 0 9329 2900
## 55813 1 0 0 0 9340 4256
## 62233 0 0 1 0 66 66
## 51031 0 1 0 0 62 62
## 23204 0 1 0 0 66 66
## 23677 0 1 0 0 66 66
## 45799 1 0 0 0 7211 823
## 54341 0 1 0 0 62 62
## 26900 0 1 0 0 62 62
## 51885 1 0 0 0 213 87
## 1749 0 1 0 0 146 146
## 64900 1 0 0 0 214 102
## 58048 1 0 0 0 819 102
## 57868 0 0 1 0 70 70
## 29692 1 0 0 0 815 95
## 23808 1 0 0 0 969 86
## 8669 1 0 0 0 8702 4668
## 55070 1 0 0 0 66 66
## 4420 1 0 0 0 17936 14273
## 4438 1 0 0 0 303 94
## 13451 1 0 0 0 183 102
## 20527 1 0 0 0 10101 2782
## 11491 1 0 0 0 366 240
## 2340 0 1 0 0 62 62
## 42925 0 0 1 0 66 66
## 44740 0 1 0 0 60 60
## 28485 1 0 0 0 60483 25857
## 37233 1 0 0 0 192 90
## 57552 1 0 0 0 492 306
## 4022 1 0 0 0 66 66
## 27255 0 1 0 0 66 66
## 40267 0 0 1 0 66 66
## 22745 0 1 0 0 62 62
## 18627 1 0 0 0 814 110
## 21567 1 0 0 0 874 126
## 14724 1 0 0 0 78 78
## 63564 0 0 1 0 66 66
## 41864 1 0 0 0 38985 4250
## 58136 1 0 0 0 179 86
## 2761 0 0 1 0 70 70
## 63672 1 0 0 0 395978 13168
## 53764 0 0 1 0 70 70
## 21811 1 0 0 0 7641 5165
## 9941 1 0 0 0 2308 1541
## 25445 1 0 0 0 93 93
## 9207 0 1 0 0 62 62
## 22495 0 0 1 0 70 70
## 60133 1 0 0 0 4224 2055
## 27618 1 0 0 0 292 94
## 64536 0 1 0 0 74 74
## 59619 1 0 0 0 138 69
## 33984 0 0 1 0 70 70
## 46358 1 0 0 0 8962 1601
## 36883 1 0 0 0 168 78
## 54999 0 0 1 0 66 66
## 37217 1 0 0 0 752 93
## 29937 0 1 0 0 60 60
## 58791 1 0 0 0 66 66
## 9019 1 0 0 0 260 86
## 27556 1 0 0 0 283970 23419
## 50951 1 0 0 0 889 126
## 53670 0 0 1 0 70 70
## 1998 0 1 0 0 146 146
## 5556 1 0 0 0 10492 3167
## 34498 1 0 0 0 3522 869
## 8791 0 1 0 0 146 146
## 35198 1 0 0 0 5463 1378
## 49599 1 0 0 0 229 110
## 10546 1 0 0 0 1626 798
## 7832 0 1 0 0 62 62
## 5724 0 1 0 0 60 60
## 53065 1 0 0 0 2582500 86854
## 55222 0 1 0 0 66 66
## 60563 0 1 0 0 66 66
## 6649 1 0 0 0 70 70
## 16201 0 0 1 0 70 70
## 37112 0 1 0 0 66 66
## 39159 1 0 0 0 316 136
## 50280 1 0 0 0 194 70
## 21664 0 0 1 0 70 70
## 31355 1 0 0 0 195 102
## 33033 0 0 1 0 70 70
## 38411 1 0 0 0 38272 2881
## 36913 1 0 0 0 5624 2099
## 58817 1 0 0 0 462 462
## 9343 1 0 0 0 422 164
## 39610 1 0 0 0 130 70
## 41479 0 0 1 0 66 66
## 36548 0 0 1 0 70 70
## 9626 0 1 0 0 62 62
## 48451 1 0 0 0 11580 4032
## 56007 1 0 0 0 199 102
## 42634 1 0 0 0 7740 4267
## 63808 1 0 0 0 26154 6874
## 25140 1 0 0 0 200 102
## 49492 1 0 0 0 264 70
## 3807 1 0 0 0 190 97
## 13077 0 1 0 0 60 60
## 18195 1 0 0 0 199 102
## 50565 1 0 0 0 6308 829
## 38529 1 0 0 0 198 102
## 508 1 0 0 0 70 70
## 16306 1 0 0 0 728 102
## 10393 0 0 1 0 66 66
## 30229 1 0 0 0 307 105
## 34807 1 0 0 0 613 539
## 27594 1 0 0 0 1386 1386
## 21686 0 1 0 0 62 62
## 63632 0 1 0 0 60 60
## 31394 1 0 0 0 8083 1790
## 51622 1 0 0 0 881 126
## 23887 1 0 0 0 8230 2275
## 61223 1 0 0 0 10377 3140
## 27278 0 0 1 0 70 70
## 44310 0 0 1 0 66 66
## 15214 0 0 1 0 66 66
## 49693 1 0 0 0 134 60
## 9004 1 0 0 0 970703 43045
## 63308 1 0 0 0 66 66
## 20938 1 0 0 0 180 92
## 14241 1 0 0 0 318 192
## 39693 1 0 0 0 1574859 55851
## 56361 1 0 0 0 10239 2017
## 42726 1 0 0 0 7481 2070
## 6999 0 1 0 0 62 62
## 56448 0 0 1 0 66 66
## 51193 1 0 0 0 255 110
## 10311 1 0 0 0 240 98
## 53946 0 0 1 0 66 66
## 63100 0 1 0 0 145 145
## 32192 1 0 0 0 366 240
## 17834 1 0 0 0 3335 2314
## 11840 1 0 0 0 3514 1874
## 18396 1 0 0 0 197 102
## 36974 1 0 0 0 320 140
## 51924 1 0 0 0 881 126
## 1154 1 0 0 0 38455 5252
## 37419 1 0 0 0 1640 831
## 12061 1 0 0 0 153 93
## 47095 1 0 0 0 3431 2292
## 20634 1 0 0 0 194 102
## 11071 0 1 0 0 66 66
## 34051 1 0 0 0 130 70
## 41868 1 0 0 0 13580 3864
## 20200 1 0 0 0 200 102
## 43393 0 1 0 0 62 62
## 50015 0 1 0 0 60 60
## 19855 0 0 1 0 70 70
## 19289 0 1 0 0 66 66
## 59568 0 1 0 0 62 62
## 20456 0 1 0 0 62 62
## 51753 0 1 0 0 62 62
## 56755 0 1 0 0 66 66
## 1396 0 0 1 0 66 66
## 24703 1 0 0 0 10391 7059
## 47162 1 0 0 0 66 66
## 41100 0 0 1 0 70 70
## 43756 1 0 0 0 70 70
## 47463 1 0 0 0 260 86
## 36910 1 0 0 0 216 110
## 21523 1 0 0 0 5663 1711
## 51106 0 1 0 0 66 66
## 36874 1 0 0 0 199 102
## 24069 1 0 0 0 168 86
## 48118 0 0 1 0 70 70
## 35405 1 0 0 0 1916 1121
## 4221 0 0 1 0 70 70
## 52109 0 1 0 0 62 62
## 15172 0 1 0 0 146 146
## 13853 1 0 0 0 179 94
## 45875 1 0 0 0 46690 9390
## 13064 0 1 0 0 66 66
## 13594 0 0 1 0 70 70
## 18344 1 0 0 0 183 94
## 38000 1 0 0 0 537 158
## 55635 1 0 0 0 23328780 730794
## 4484 1 0 0 0 156 94
## 3323 1 0 0 0 184 94
## 17453 0 1 0 0 62 62
## 56203 0 1 0 0 74 74
## 45239 1 0 0 0 70 70
## 58768 1 0 0 0 231 71
## 33224 1 0 0 0 142 142
## 48820 0 1 0 0 62 62
## 49940 1 0 0 0 66 66
## 49139 1 0 0 0 2125 1159
## 28207 0 0 1 0 70 70
## 23732 1 0 0 0 8272 2455
## 20414 1 0 0 0 188 96
## 2857 1 0 0 0 8073 2053
## 34688 1 0 0 0 13709 1675
## 47876 1 0 0 0 350 210
## 44706 0 1 0 0 66 66
## 7466 0 1 0 0 62 62
## 299 1 0 0 0 316 136
## 13880 1 0 0 0 334 158
## 41753 0 1 0 0 60 60
## 8882 0 0 1 0 70 70
## 19941 1 0 0 0 199 102
## 32088 0 1 0 0 60 60
## 6832 0 0 1 0 66 66
## 42415 0 0 1 0 70 70
## 65526 1 0 0 0 356 118
## 2895 1 0 0 0 212 108
## 58580 1 0 0 0 183 94
## 25475 1 0 0 0 204 104
## 65009 0 1 0 0 62 62
## 10315 1 0 0 0 197 102
## 53370 1 0 0 0 6015 3964
## 35060 1 0 0 0 232 88
## 60614 0 0 1 0 66 66
## 50106 0 1 0 0 62 62
## 8385 0 0 1 0 70 70
## 10103 0 1 0 0 62 62
## 65059 0 1 0 0 148 148
## 42426 0 1 0 0 60 60
## 26704 1 0 0 0 1511 849
## 13729 1 0 0 0 882 527
## 5790 1 0 0 0 101524 1673
## 26589 0 0 1 0 70 70
## 16578 1 0 0 0 156 156
## 4490 1 0 0 0 193 102
## 21604 1 0 0 0 942 527
## 18982 0 1 0 0 66 66
## 13694 0 0 1 0 66 66
## 50725 0 0 1 0 70 70
## 39406 0 1 0 0 60 60
## 30599 1 0 0 0 172 88
## 43405 0 0 1 0 70 70
## 39561 0 0 1 0 70 70
## 9926 1 0 0 0 102 102
## 9780 0 0 1 0 70 70
## 52308 1 0 0 0 9277 3025
## 9877 1 0 0 0 382 94
## 26063 1 0 0 0 6553 1034
## 53399 1 0 0 0 5939 772
## 9951 1 0 0 0 7536 1915
## 56439 0 1 0 0 62 62
## 62318 1 0 0 0 72734 14614
## 50879 1 0 0 0 214 140
## 36016 1 0 0 0 256 88
## 22278 1 0 0 0 70 70
## 48370 1 0 0 0 123082 8734
## 57086 1 0 0 0 7222 1633
## 22735 0 1 0 0 146 146
## 6738 0 0 1 0 70 70
## 20708 1 0 0 0 327 88
## 46397 0 1 0 0 62 62
## 22130 0 1 0 0 66 66
## 40081 1 0 0 0 251 102
## 6501 1 0 0 0 3272 800
## 18037 0 1 0 0 66 66
## 18950 1 0 0 0 189 102
## 43834 1 0 0 0 184 102
## 60049 1 0 0 0 9274 5282
## 60588 0 0 1 0 70 70
## 62148 1 0 0 0 134 60
## 3520 1 0 0 0 388 134
## 21810 1 0 0 0 1584 1130
## 44472 1 0 0 0 234 234
## 37859 1 0 0 0 458684 19849
## 29802 1 0 0 0 299 110
## 33632 1 0 0 0 70 70
## 29501 1 0 0 0 70 70
## 46373 1 0 0 0 195 102
## 28722 0 1 0 0 60 60
## 56963 1 0 0 0 168 86
## 35515 1 0 0 0 216 102
## 7642 0 1 0 0 62 62
## 9193 0 0 1 0 66 66
## 50382 0 0 1 0 66 66
## 27418 1 0 0 0 33323 3349
## 52448 0 1 0 0 66 66
## 23072 1 0 0 0 179 86
## 30147 0 1 0 0 146 146
## 44119 1 0 0 0 1786 172
## 64169 1 0 0 0 494 420
## 12619 1 0 0 0 770 102
## 28265 1 0 0 0 1786 172
## 6073 0 0 1 0 66 66
## 65152 0 1 0 0 146 146
## 52381 0 1 0 0 62 62
## 18375 1 0 0 0 195 102
## 24541 1 0 0 0 5167 700
## 34697 0 1 0 0 146 146
## 54059 1 0 0 0 10458 5272
## 59260 1 0 0 0 576971 8128
## 27153 1 0 0 0 320 140
## 61217 1 0 0 0 458 188
## 17470 0 1 0 0 66 66
## 41021 0 0 1 0 66 66
## 62674 1 0 0 0 3655 2344
## 47772 1 0 0 0 167 86
## 6539 1 0 0 0 414 268
## 39436 0 0 1 0 70 70
## 49383 0 0 1 0 66 66
## 30540 0 0 1 0 70 70
## 62053 1 0 0 0 194 102
## 17416 0 0 1 0 70 70
## 7008 0 1 0 0 62 62
## 57263 0 1 0 0 62 62
## 37999 1 0 0 0 7221 2265
## 43125 1 0 0 0 318 192
## 10021 0 0 1 0 66 66
## 5759 1 0 0 0 366 240
## 3543 1 0 0 0 8543 1114
## 42108 0 1 0 0 62 62
## 10333 1 0 0 0 1849 1130
## 22250 0 0 1 0 70 70
## 63362 1 0 0 0 70 70
## 42107 0 0 1 0 66 66
## 37885 1 0 0 0 209 110
## 48353 1 0 0 0 193 102
## 56872 1 0 0 0 5692 1239
## 45439 1 0 0 0 900 126
## 9708 0 1 0 0 62 62
## 52345 1 0 0 0 21391 16334
## 17467 0 1 0 0 62 62
## 53809 1 0 0 0 70 70
## 49239 0 1 0 0 60 60
## 43093 1 0 0 0 70 70
## 32259 0 1 0 0 66 66
## 12191 1 0 0 0 453 186
## 15647 1 0 0 0 213 79
## 4951 1 0 0 0 199 102
## 23968 0 0 1 0 70 70
## 19777 1 0 0 0 188 96
## 59511 0 0 1 0 70 70
## 2995 1 0 0 0 136 136
## 5866 1 0 0 0 8211 2165
## 28426 0 1 0 0 66 66
## 3616 0 0 1 0 70 70
## 13391 1 0 0 0 27534 4859
## 39370 1 0 0 0 183 94
## 55447 1 0 0 0 186 102
## 24869 0 1 0 0 1345 1345
## 44665 1 0 0 0 290 102
## 63753 1 0 0 0 832 106
## 64667 0 1 0 0 62 62
## 16094 1 0 0 0 235 90
## 42002 1 0 0 0 70 70
## 33910 0 0 1 0 66 66
## 17145 1 0 0 0 183 94
## 46779 1 0 0 0 736 670
## 10282 1 0 0 0 3488 1774
## 67 1 0 0 0 13948 1150
## 35695 0 1 0 0 74 74
## 37981 1 0 0 0 158 158
## 13702 0 0 1 0 70 70
## 1194 1 0 0 0 177 94
## 61358 0 0 1 0 66 66
## 35140 1 0 0 0 177 94
## 10447 0 0 1 0 70 70
## 50583 1 0 0 0 1006 252
## 17473 0 0 1 0 70 70
## 55426 1 0 0 0 178 94
## 21111 1 0 0 0 3613 1594
## 55223 0 1 0 0 62 62
## 58596 1 0 0 0 11546 3604
## 36957 1 0 0 0 193 102
## 51457 0 0 1 0 70 70
## 2957 0 1 0 0 66 66
## 3828 1 0 0 0 62 62
## 23251 0 0 1 0 70 70
## 62058 1 0 0 0 4433 1526
## 12622 1 0 0 0 757 98
## 45778 1 0 0 0 196 102
## 44347 0 0 1 0 70 70
## 40350 0 1 0 0 60 60
## 4716 0 0 1 0 70 70
## 18161 1 0 0 0 168 86
## 40307 0 1 0 0 60 60
## 29537 0 1 0 0 66 66
## 26918 0 0 1 0 70 70
## 15757 0 1 0 0 60 60
## 63252 1 0 0 0 396 396
## 19603 0 1 0 0 66 66
## 41439 0 1 0 0 66 66
## 12858 1 0 0 0 661695 28430
## 38480 1 0 0 0 490 111
## 65135 1 0 0 0 49181 3553
## 9925 1 0 0 0 177 94
## 8855 0 1 0 0 62 62
## 12669 0 1 0 0 66 66
## 44887 0 0 1 0 66 66
## 58489 1 0 0 0 4994 379
## 2916 1 0 0 0 70 70
## 58527 1 0 0 0 66 66
## 32516 1 0 0 0 7941 1952
## 44165 1 0 0 0 177 94
## 28845 0 0 1 0 70 70
## 24255 1 0 0 0 1364 940
## 49754 0 0 1 0 70 70
## 10975 0 0 1 0 66 66
## 43106 1 0 0 0 211 102
## 126 1 0 0 0 70 70
## 30358 1 0 0 0 210 110
## 39835 0 0 1 0 70 70
## 11092 0 1 0 0 60 60
## 58970 1 0 0 0 261 78
## 38068 0 1 0 0 66 66
## 56544 1 0 0 0 216 110
## 49764 0 1 0 0 156 156
## 54740 1 0 0 0 3476 1433
## 63352 1 0 0 0 178 118
## 7364 0 1 0 0 62 62
## 44939 1 0 0 0 1165 662
## 10246 1 0 0 0 330 101
## 5061 1 0 0 0 314 192
## 59974 1 0 0 0 238 99
## 44699 0 0 1 0 66 66
## 31081 1 0 0 0 172 88
## 48377 1 0 0 0 184 94
## 51888 0 1 0 0 66 66
## 62738 1 0 0 0 9246 1808
## 62006 1 0 0 0 18360 1734
## 21966 1 0 0 0 23323 1589
## 42931 0 1 0 0 146 146
## 25772 1 0 0 0 184 94
## 49250 1 0 0 0 70 70
## 29343 0 1 0 0 146 146
## 46830 1 0 0 0 199 102
## 64366 1 0 0 0 212 94
## 32378 1 0 0 0 212 110
## 30177 1 0 0 0 7534 2392
## 63199 0 0 1 0 70 70
## 4908 1 0 0 0 271 95
## 50498 1 0 0 0 330 330
## 8854 0 1 0 0 62 62
## 63193 0 0 1 0 70 70
## 64109 1 0 0 0 4988930 161289
## 33814 0 1 0 0 66 66
## 56606 1 0 0 0 2338 1419
## 32064 0 1 0 0 145 145
## 53274 0 1 0 0 66 66
## 58640 0 1 0 0 66 66
## 13415 1 0 0 0 318 192
## 61401 1 0 0 0 66 66
## 40211 0 1 0 0 66 66
## 33513 1 0 0 0 233 91
## 22778 0 0 1 0 70 70
## 3392 1 0 0 0 1599 743
## 20397 1 0 0 0 1414 95
## 57570 1 0 0 0 548 112
## 32793 1 0 0 0 234 234
## 58987 1 0 0 0 5378 984
## 37694 1 0 0 0 1548 847
## 17478 0 1 0 0 62 62
## 37416 1 0 0 0 21063 13499
## 46727 1 0 0 0 3490 1923
## 59235 0 1 0 0 146 146
## 46854 1 0 0 0 168 86
## 16105 1 0 0 0 23467 16522
## 47560 0 0 1 0 70 70
## 64789 1 0 0 0 318 192
## 15460 1 0 0 0 376 94
## 56809 0 1 0 0 62 62
## 16910 0 1 0 0 62 62
## 53389 1 0 0 0 193 102
## 43035 1 0 0 0 1342 918
## 60235 0 1 0 0 66 66
## 41093 1 0 0 0 6441 1651
## 56126 1 0 0 0 1151 711
## 62730 1 0 0 0 3155 836
## 45910 1 0 0 0 66 66
## 41809 0 1 0 0 62 62
## 1267 1 0 0 0 12715 3716
## 19261 0 0 1 0 70 70
## 2440 1 0 0 0 338 110
## 648 0 1 0 0 62 62
## 53215 0 0 1 0 70 70
## 8936 1 0 0 0 2163 676
## 15448 1 0 0 0 16296 9143
## 15150 0 1 0 0 66 66
## 37710 1 0 0 0 201 80
## 25677 1 0 0 0 193 102
## 43700 1 0 0 0 4814 1207
## 12795 1 0 0 0 4216 771
## 59550 0 0 1 0 70 70
## 25684 1 0 0 0 550 113
## 41534 0 0 1 0 70 70
## 59342 1 0 0 0 214 140
## 47800 1 0 0 0 209 110
## 45862 1 0 0 0 7692 2183
## 38812 1 0 0 0 166 86
## 34105 1 0 0 0 66 66
## 40698 1 0 0 0 8251 2124
## 47109 1 0 0 0 196 100
## 6070 0 1 0 0 99 99
## 4936 1 0 0 0 209 110
## 48585 1 0 0 0 183 94
## 16349 1 0 0 0 5166 700
## 34766 1 0 0 0 4521 1614
## 30601 1 0 0 0 7280 2277
## 25224 1 0 0 0 214 110
## 40039 0 1 0 0 60 60
## 21238 0 1 0 0 60 60
## 38457 1 0 0 0 199 102
## 31129 1 0 0 0 168 86
## 45840 1 0 0 0 177 94
## 39498 0 0 1 0 70 70
## 937 1 0 0 0 9075 3727
## 41842 0 0 1 0 70 70
## 10481 0 1 0 0 60 60
## 9401 0 1 0 0 66 66
## 38591 0 0 1 0 70 70
## 32514 1 0 0 0 183 94
## 36438 1 0 0 0 177 94
## 46934 1 0 0 0 700 282
## 654 0 1 0 0 62 62
## 42409 0 0 1 0 66 66
## 45238 1 0 0 0 7701 2215
## 53974 0 0 1 0 70 70
## 20276 0 0 1 0 70 70
## 28562 1 0 0 0 2981 1719
## 60239 0 1 0 0 145 145
## 25939 0 1 0 0 74 74
## 53410 1 0 0 0 422 164
## 4500 1 0 0 0 183 94
## 15016 0 1 0 0 66 66
## 44394 0 1 0 0 66 66
## 22466 0 0 1 0 70 70
## 45696 0 1 0 0 60 60
## 37977 1 0 0 0 326 78
## 4418 1 0 0 0 6351 4202
## 14324 1 0 0 0 1778 1386
## 26789 1 0 0 0 392 102
## 58895 1 0 0 0 312 132
## 19359 1 0 0 0 5168 700
## 55053 1 0 0 0 8580 5765
## 23932 0 1 0 0 62 62
## 18862 1 0 0 0 196 102
## 33255 1 0 0 0 370 94
## 4635 0 1 0 0 66 66
## 57940 1 0 0 0 234 234
## 40995 0 1 0 0 78 78
## 17949 0 1 0 0 60 60
## 51586 1 0 0 0 234 234
## 50158 1 0 0 0 214 97
## 42441 0 0 1 0 70 70
## 23657 0 1 0 0 66 66
## 47732 1 0 0 0 733 86
## 30165 1 0 0 0 177 94
## 39466 0 1 0 0 88 88
## 129 1 0 0 0 70 70
## 9309 1 0 0 0 45487 4986
## 19029 0 1 0 0 66 66
## 62043 1 0 0 0 212 102
## 19374 1 0 0 0 214 110
## 48164 0 1 0 0 66 66
## 38602 0 0 1 0 70 70
## 45567 0 0 1 0 66 66
## 25998 1 0 0 0 70 70
## 8097 1 0 0 0 3910 1805
## 56250 0 1 0 0 62 62
## 21983 1 0 0 0 86 86
## 47031 0 0 1 0 66 66
## 22223 0 0 1 0 70 70
## 12560 1 0 0 0 70 70
## 3821 1 0 0 0 62 62
## 46484 0 0 1 0 66 66
## 16385 1 0 0 0 4898 1119
## 24859 0 0 1 0 70 70
## 35478 1 0 0 0 177 94
## 24834 0 0 1 0 66 66
## 44816 1 0 0 0 4453 1546
## 50037 0 1 0 0 62 62
## 57070 1 0 0 0 1833 991
## 31616 0 1 0 0 62 62
## 10860 0 0 1 0 70 70
## 1625 1 0 0 0 179 94
## 14913 1 0 0 0 29401 10036
## 12625 1 0 0 0 7755 4481
## 63031 1 0 0 0 35406 17785
## 35057 1 0 0 0 168 78
## 6705 0 1 0 0 74 74
## 62480 1 0 0 0 6404 829
## 59860 0 0 1 0 70 70
## 42139 0 0 1 0 70 70
## 43081 1 0 0 0 1130 613
## 59150 0 0 1 0 66 66
## 36476 1 0 0 0 766 248
## 43734 1 0 0 0 1634 126
## 6615 1 0 0 0 3019 716
## 10184 1 0 0 0 9276 2422
## 9778 0 0 1 0 70 70
## 20712 1 0 0 0 1907736 31488
## 13640 1 0 0 0 184 94
## 59389 1 0 0 0 2872 717
## 5523 1 0 0 0 210 110
## 29064 1 0 0 0 190013 14784
## 27924 1 0 0 0 4737 2854
## 14337 1 0 0 0 7731 2016
## 48881 1 0 0 0 180 94
## 50532 0 1 0 0 66 66
## 55425 1 0 0 0 194 102
## 11647 1 0 0 0 70 70
## 2860 1 0 0 0 7476 2192
## 28080 0 0 1 0 70 70
## 29785 1 0 0 0 1642 822
## 39675 1 0 0 0 606 300
## 12618 1 0 0 0 740 93
## 11206 1 0 0 0 288 93
## 50716 0 0 1 0 70 70
## 24100 1 0 0 0 151233 12592
## 7677 0 1 0 0 62 62
## 17044 1 0 0 0 206 105
## 41904 0 1 0 0 62 62
## 60500 1 0 0 0 70 70
## 23255 0 0 1 0 66 66
## 15255 0 0 1 0 70 70
## 38270 0 0 1 0 70 70
## 60237 0 1 0 0 66 66
## 40274 0 0 1 0 66 66
## 34028 1 0 0 0 167 86
## 24954 1 0 0 0 3586 1074
## 24440 0 0 1 0 70 70
## 4543 1 0 0 0 183 94
## 8889 0 1 0 0 66 66
## 26404 0 0 1 0 70 70
## 1332 0 0 1 0 70 70
## 64088 1 0 0 0 4317 1134
## 22392 0 1 0 0 66 66
## 64184 0 0 1 0 70 70
## 3122 1 0 0 0 183 94
## 17259 1 0 0 0 212 102
## 35015 1 0 0 0 318 192
## 34728 0 0 1 0 66 66
## 29325 0 1 0 0 62 62
## 56196 0 0 1 0 70 70
## 9522 1 0 0 0 223 78
## 8004 0 1 0 0 66 66
## 41413 0 0 1 0 70 70
## 19198 1 0 0 0 8407 3736
## 59999 1 0 0 0 168 78
## 47586 0 1 0 0 66 66
## 40981 0 0 1 0 66 66
## 24452 0 0 1 0 70 70
## 6731 0 1 0 0 62 62
## 13483 1 0 0 0 166 86
## 60650 0 1 0 0 66 66
## 30117 1 0 0 0 37440 7952
## 21332 0 0 1 0 70 70
## 37240 0 1 0 0 66 66
## 31307 1 0 0 0 6698 1606
## 19466 1 0 0 0 165 86
## 16111 1 0 0 0 80933 20860
## 41387 0 1 0 0 60 60
## 18679 1 0 0 0 7389 1786
## 50369 0 1 0 0 66 66
## 41349 0 1 0 0 146 146
## 29521 0 0 1 0 70 70
## 13011 0 0 1 0 66 66
## 7192 0 1 0 0 62 62
## 45024 1 0 0 0 199 94
## 65239 1 0 0 0 254281 66535
## 25580 0 0 1 0 70 70
## 37836 1 0 0 0 209 102
## 31840 1 0 0 0 5168 700
## 22142 0 1 0 0 162 162
## 62123 1 0 0 0 211 102
## 12840 1 0 0 0 7685 1016
## 25670 1 0 0 0 6982 1728
## 31825 1 0 0 0 321 108
## 25728 1 0 0 0 214 110
## 65151 0 0 1 0 70 70
## 48911 1 0 0 0 199 102
## 57171 1 0 0 0 211 110
## 4733 0 1 0 0 62 62
## 55275 0 0 1 0 66 66
## 49213 0 1 0 0 66 66
## 10523 0 0 1 0 70 70
## 20474 1 0 0 0 374 240
## 23319 1 0 0 0 9262 2792
## 37131 0 0 1 0 70 70
## 44281 1 0 0 0 166 166
## 34899 0 0 1 0 66 66
## 25149 1 0 0 0 199 102
## 10468 0 1 0 0 60 60
## 968 0 1 0 0 62 62
## 59957 0 0 1 0 66 66
## 26999 1 0 0 0 2898 1485
## 4953 1 0 0 0 198 102
## 16950 0 1 0 0 66 66
## 43154 0 1 0 0 148 148
## 10027 0 0 1 0 66 66
## 45632 1 0 0 0 2148 918
## 38163 0 1 0 0 129 129
## 38768 0 0 1 0 70 70
## 41171 1 0 0 0 83683 2747
## 6703 0 0 1 0 66 66
## 39721 1 0 0 0 199 102
## 61720 0 1 0 0 66 66
## 29920 0 0 1 0 70 70
## 38698 1 0 0 0 10781 3635
## 32774 1 0 0 0 8252 1702
## 6876 0 1 0 0 145 145
## 17938 0 1 0 0 60 60
## 29961 0 1 0 0 146 146
## 37801 1 0 0 0 214 110
## 64428 1 0 0 0 177 94
## 46086 1 0 0 0 170 87
## 23041 1 0 0 0 168 78
## 42350 1 0 0 0 199 94
## 348 1 0 0 0 2901 1407
## 40766 1 0 0 0 6084 1842
## 31159 1 0 0 0 9737 3353
## 18513 0 1 0 0 62 62
## 14618 1 0 0 0 294 94
## 39559 0 1 0 0 62 62
## 5038 1 0 0 0 199 102
## 48438 1 0 0 0 196 102
## 1831 0 1 0 0 83 83
## 35323 0 1 0 0 66 66
## 37857 1 0 0 0 8025 3048
## 7285 0 1 0 0 62 62
## 16144 0 0 1 0 70 70
## 47434 1 0 0 0 195 94
## 9231 0 0 1 0 70 70
## 45212 0 1 0 0 66 66
## 18148 1 0 0 0 747 462
## 43671 0 1 0 0 66 66
## 5536 1 0 0 0 199 102
## 62174 1 0 0 0 6351 1370
## 38306 1 0 0 0 5312 1510
## 40841 1 0 0 0 294 86
## 60676 0 0 1 0 66 66
## 10502 0 0 1 0 66 66
## 37436 1 0 0 0 7165 1312
## 4795 1 0 0 0 240 76
## 6256 1 0 0 0 5814771 76275
## 37313 1 0 0 0 754 94
## 50684 0 0 1 0 70 70
## 34386 1 0 0 0 708 384
## 52359 1 0 0 0 235 90
## 9890 1 0 0 0 230 102
## 65303 1 0 0 0 5870176 83236
## 20505 1 0 0 0 194 102
## 31820 1 0 0 0 70 70
## 40972 0 0 1 0 70 70
## 37810 1 0 0 0 78 78
## 54407 0 1 0 0 62 62
## 28776 0 0 1 0 70 70
## 30685 1 0 0 0 630 508
## 39729 1 0 0 0 450 246
## 64756 1 0 0 0 7978844 97517
## 2159 1 0 0 0 25125 2681
## 43849 1 0 0 0 168 78
## 43733 1 0 0 0 1624 126
## 55828 1 0 0 0 8290 2156
## 39333 1 0 0 0 209 110
## 42519 0 1 0 0 146 146
## 21231 1 0 0 0 2895 1646
## 4956 1 0 0 0 193 102
## 42738 1 0 0 0 214 140
## 12737 1 0 0 0 4458 1022
## 17356 0 0 1 0 70 70
## 21136 1 0 0 0 275 96
## 56176 1 0 0 0 177 94
## 47194 1 0 0 0 198 93
## 42498 0 0 1 0 70 70
## 47156 1 0 0 0 202 103
## 25443 1 0 0 0 184 94
## 50638 1 0 0 0 486 240
## 36705 1 0 0 0 164 84
## 11590 0 1 0 0 62 62
## 48757 1 0 0 0 572 292
## 10712 1 0 0 0 455 102
## 29445 1 0 0 0 5166 700
## 54553 0 1 0 0 60 60
## 56357 1 0 0 0 751 95
## 29759 1 0 0 0 1899 1310
## 40385 1 0 0 0 8256 3157
## 60274 0 1 0 0 62 62
## 38239 0 0 1 0 66 66
## 44772 0 1 0 0 62 62
## 46155 1 0 0 0 3363 797
## 60110 1 0 0 0 130 70
## 44915 1 0 0 0 29039 3057
## 22420 0 0 1 0 66 66
## 46789 1 0 0 0 199 102
## 62768 0 0 1 0 70 70
## 41684 1 0 0 0 132 132
## 11564 0 1 0 0 60 60
## 38847 1 0 0 0 219 98
## 21965 1 0 0 0 132 70
## 37530 0 1 0 0 66 66
## 16743 1 0 0 0 178 94
## 41515 1 0 0 0 3751 1007
## 543 1 0 0 0 177 94
## 3988 1 0 0 0 184 94
## 57777 1 0 0 0 499 425
## 25542 0 1 0 0 62 62
## 58279 0 1 0 0 62 62
## 35155 1 0 0 0 782 428
## 5340 1 0 0 0 10454 1724
## 27236 1 0 0 0 3244 1630
## 44362 0 1 0 0 62 62
## 32797 1 0 0 0 211 102
## 52902 1 0 0 0 21288 10113
## 42171 0 1 0 0 66 66
## 3124 1 0 0 0 177 94
## 12614 1 0 0 0 168 86
## 65506 1 0 0 0 5776 1880
## 18576 0 1 0 0 66 66
## 53355 1 0 0 0 194 99
## 42124 0 1 0 0 62 62
## 38205 1 0 0 0 186 95
## 31562 0 1 0 0 62 62
## 56730 0 1 0 0 66 66
## 1160 1 0 0 0 184 94
## 46391 0 1 0 0 62 62
## 7486 0 1 0 0 62 62
## 54079 1 0 0 0 366 180
## 5158 0 1 0 0 66 66
## 35908 0 0 1 0 70 70
## 37132 0 0 1 0 70 70
## 2820 0 0 1 0 70 70
## 13283 1 0 0 0 165 86
## 23624 0 1 0 0 62 62
## 56373 1 0 0 0 3354 1792
## 57240 1 0 0 0 70 70
## 59170 0 0 1 0 70 70
## 21379 0 1 0 0 66 66
## 5372 1 0 0 0 176 90
## 33558 1 0 0 0 4427 1520
## 5211 0 0 1 0 66 66
## 48266 1 0 0 0 172 88
## 5001 1 0 0 0 367162 17367
## 53703 1 0 0 0 7149 1719
## 2760 0 1 0 0 62 62
## 42501 0 0 1 0 66 66
## 13955 0 0 1 0 70 70
## 38479 1 0 0 0 4305 699
## 2007 0 1 0 0 62 62
## 826 1 0 0 0 558 210
## 52165 0 0 1 0 70 70
## 55552 1 0 0 0 196 102
## 51633 1 0 0 0 323 150
## 37398 1 0 0 0 178 94
## 45954 0 0 1 0 66 66
## 57835 0 1 0 0 66 66
## 5349 1 0 0 0 70 70
## 22697 1 0 0 0 130 70
## 56202 0 0 1 0 70 70
## 13350 1 0 0 0 35970 11674
## 8258 1 0 0 0 295 90
## 48253 1 0 0 0 204 64
## 20797 0 1 0 0 66 66
## 248 0 0 1 0 70 70
## 26891 0 0 1 0 70 70
## 63302 1 0 0 0 199 102
## 4057 1 0 0 0 2192 1049
## 29021 1 0 0 0 14233438 253696
## 26932 0 1 0 0 60 60
## 22840 0 0 1 0 70 70
## 6230 1 0 0 0 217 90
## 41527 0 1 0 0 146 146
## 19558 1 0 0 0 870 330
## 30532 0 0 1 0 70 70
## 30065 1 0 0 0 318 192
## 44966 1 0 0 0 10325 2594
## 49462 1 0 0 0 4962 887
## 31542 0 0 1 0 70 70
## 20933 1 0 0 0 5574 999
## 12377 0 1 0 0 60 60
## 16459 0 1 0 0 62 62
## 16917 1 0 0 0 198 128
## 48849 1 0 0 0 244 98
## 58835 0 1 0 0 66 66
## 33042 0 0 1 0 70 70
## 40843 1 0 0 0 4398 1491
## 7860 0 1 0 0 62 62
## 5928 1 0 0 0 225 110
## 61501 1 0 0 0 9597 2488
## 63685 1 0 0 0 4626083 113708
## 14451 0 0 1 0 70 70
## 8193 1 0 0 0 366 240
## 63312 1 0 0 0 138 78
## 40372 0 1 0 0 66 66
## 59106 0 0 1 0 70 70
## 57863 0 1 0 0 62 62
## 5425 0 0 1 0 66 66
## 18274 1 0 0 0 7274 1925
## 9576 1 0 0 0 70 70
## 45003 1 0 0 0 70 70
## 61281 0 1 0 0 66 66
## 55213 0 1 0 0 66 66
## 42753 1 0 0 0 30057 13955
## 5847 0 0 1 0 70 70
## 10623 0 0 1 0 70 70
## 7664 0 1 0 0 62 62
## 3982 1 0 0 0 260 102
## 62328 1 0 0 0 9616 6956
## 7794 0 1 0 0 62 62
## 54274 1 0 0 0 177 94
## 11335 1 0 0 0 3642 421
## 22825 0 0 1 0 70 70
## Bytes.Received Packets Elapsed.Time..sec. pkts_sent pkts_received
## 4519 7140 60 410 27 33
## 13921 0 1 0 1 0
## 21316 0 1 0 1 0
## 61397 0 1 5 1 0
## 59091 0 1 0 1 0
## 61449 6900 26 362 12 14
## 21342 0 1 0 1 0
## 10124 430 12 81 7 5
## 62762 0 6 1199 6 0
## 42177 0 1 0 1 0
## 21317 0 1 0 1 0
## 2656 0 1 0 1 0
## 35422 87 2 30 1 1
## 4781 89 2 30 1 1
## 55032 83 2 30 1 1
## 2793 0 1 0 1 0
## 63544 0 1 0 1 0
## 1966 258 9 21 5 4
## 58168 132 2 30 1 1
## 37824 1034 14 151 8 6
## 48250 1190 13 83 7 6
## 41867 12913 23 124 9 14
## 32808 0 2 40 2 0
## 60722 0 1 0 1 0
## 12209 101 2 30 1 1
## 58810 6290 49 115 22 27
## 43979 0 1 0 1 0
## 36938 202 4 31 2 2
## 14813 0 1 0 1 0
## 59979 143 2 32 1 1
## 55479 106 2 30 1 1
## 41314 380 2 1200 1 1
## 22837 0 1 0 1 0
## 56850 4511 25 16 13 12
## 7337 0 1 0 1 0
## 23632 0 1 0 1 0
## 16543 1263 23 27 13 10
## 61906 0 1 0 1 0
## 64959 0 1 0 1 0
## 32094 0 1 0 1 0
## 38121 0 1 0 1 0
## 48018 745 2 31 1 1
## 32945 0 1 0 1 0
## 50828 9235 31 82 17 14
## 15931 0 1 0 1 0
## 4392 952 10 15 6 4
## 62548 5509 38 90 19 19
## 28973 0 1 4 1 0
## 63723 0 8 44 8 0
## 52780 0 1 0 1 0
## 47578 0 1 0 1 0
## 22171 1775 15 15 8 7
## 50173 209 2 30 1 1
## 22596 0 1 4 1 0
## 41014 0 1 0 1 0
## 37644 0 1 0 1 0
## 44523 0 1 0 1 0
## 40851 258 6 30 3 3
## 2252 0 1 0 1 0
## 43215 0 1 0 1 0
## 6227 76349 103 16 48 55
## 3920 105 2 29 1 1
## 60301 0 1 0 1 0
## 3081 90 2 29 1 1
## 8570 613 2 30 1 1
## 65470 0 1 5 1 0
## 28805 0 1 0 1 0
## 41110 0 1 0 1 0
## 25385 0 1 0 1 0
## 44242 206 8 26 5 3
## 35075 0 1 0 1 0
## 32480 78 2 30 1 1
## 54476 613 2 30 1 1
## 51675 0 1 0 1 0
## 44625 123 2 29 1 1
## 45247 930 19 34 11 8
## 39488 0 1 0 1 0
## 8487 0 1 0 1 0
## 5842 91 2 30 1 1
## 23741 737 2 30 1 1
## 59676 119 2 29 1 1
## 58646 0 1 0 1 0
## 28429 0 1 0 1 0
## 64070 122 2 30 1 1
## 17301 95 2 31 1 1
## 10147 7795 41 150 21 20
## 34908 0 1 0 1 0
## 13079 0 1 0 1 0
## 25136 80 2 30 1 1
## 53449 0 2 8 2 0
## 17039 223 2 31 1 1
## 65011 0 1 0 1 0
## 30757 93 2 30 1 1
## 32937 0 1 0 1 0
## 61594 450 12 78 7 5
## 41164 5273 22 129 13 9
## 37252 91 2 30 1 1
## 31019 0 1 0 1 0
## 41787 0 1 0 1 0
## 30595 90 2 31 1 1
## 32431 101 2 30 1 1
## 26499 0 1 0 1 0
## 30173 6429 27 135 15 12
## 55813 5084 24 16 13 11
## 62233 0 1 0 1 0
## 51031 0 1 0 1 0
## 23204 0 1 0 1 0
## 23677 0 1 0 1 0
## 45799 6388 16 15 10 6
## 54341 0 1 0 1 0
## 26900 0 1 0 1 0
## 51885 126 2 31 1 1
## 1749 0 1 0 1 0
## 64900 112 2 30 1 1
## 58048 717 2 30 1 1
## 57868 0 1 0 1 0
## 29692 720 2 31 1 1
## 23808 883 2 30 1 1
## 8669 4034 92 626 50 42
## 55070 0 2 8 2 0
## 4420 3663 59 122 33 26
## 4438 209 4 32 2 2
## 13451 81 2 30 1 1
## 20527 7319 22 78 10 12
## 11491 126 7 25 5 2
## 2340 0 1 0 1 0
## 42925 0 1 0 1 0
## 44740 0 1 0 1 0
## 28485 34626 136 47 63 73
## 37233 102 2 31 1 1
## 57552 186 8 15 5 3
## 4022 0 1 4 1 0
## 27255 0 1 0 1 0
## 40267 0 1 0 1 0
## 22745 0 1 0 1 0
## 18627 704 2 30 1 1
## 21567 748 2 30 1 1
## 14724 0 2 8 2 0
## 63564 0 1 0 1 0
## 41864 34735 58 19 27 31
## 58136 93 2 30 1 1
## 2761 0 1 0 1 0
## 63672 382810 411 143 151 260
## 53764 0 1 0 1 0
## 21811 2476 25 18 13 12
## 9941 767 11 18 7 4
## 25445 0 1 30 1 0
## 9207 0 1 0 1 0
## 22495 0 1 0 1 0
## 60133 2169 22 21 13 9
## 27618 198 2 30 1 1
## 64536 0 1 0 1 0
## 59619 69 2 30 1 1
## 33984 0 1 0 1 0
## 46358 7361 26 186 14 12
## 36883 90 2 30 1 1
## 54999 0 1 0 1 0
## 37217 659 2 31 1 1
## 29937 0 1 0 1 0
## 58791 0 1 5 1 0
## 9019 174 2 30 1 1
## 27556 260551 574 158 255 319
## 50951 763 2 30 1 1
## 53670 0 1 0 1 0
## 1998 0 1 0 1 0
## 5556 7325 40 15 24 16
## 34498 2653 13 121 6 7
## 8791 0 1 0 1 0
## 35198 4085 16 145 10 6
## 49599 119 2 29 1 1
## 10546 828 11 61 6 5
## 7832 0 1 0 1 0
## 5724 0 1 0 1 0
## 53065 2495646 2678 84 986 1692
## 55222 0 1 0 1 0
## 60563 0 1 0 1 0
## 6649 0 2 8 2 0
## 16201 0 1 0 1 0
## 37112 0 1 0 1 0
## 39159 180 6 6 3 3
## 50280 124 3 11 1 2
## 21664 0 1 0 1 0
## 31355 93 2 30 1 1
## 33033 0 1 0 1 0
## 38411 35391 54 54 26 28
## 36913 3525 30 23 17 13
## 58817 0 8 3633 8 0
## 9343 258 6 30 3 3
## 39610 60 2 15 1 1
## 41479 0 1 0 1 0
## 36548 0 1 0 1 0
## 9626 0 1 0 1 0
## 48451 7548 79 19 40 39
## 56007 97 2 30 1 1
## 42634 3473 39 97 21 18
## 63808 19280 35 17 16 19
## 25140 98 2 30 1 1
## 49492 194 4 19 1 3
## 3807 93 2 31 1 1
## 13077 0 1 0 1 0
## 18195 97 2 29 1 1
## 50565 5479 18 15 9 9
## 38529 96 2 30 1 1
## 508 0 1 5 1 0
## 16306 626 2 30 1 1
## 10393 0 1 0 1 0
## 30229 202 4 47 2 2
## 34807 74 7 15 6 1
## 27594 0 22 1526 22 0
## 21686 0 1 0 1 0
## 63632 0 1 0 1 0
## 31394 6293 27 150 14 13
## 51622 755 2 30 1 1
## 23887 5955 18 135 8 10
## 61223 7237 23 52 11 12
## 27278 0 1 0 1 0
## 44310 0 1 0 1 0
## 15214 0 1 0 1 0
## 49693 74 3 6 2 1
## 9004 927658 1244 141 563 681
## 63308 0 2 7 2 0
## 20938 88 2 30 1 1
## 14241 126 6 26 4 2
## 39693 1519008 1758 185 656 1102
## 56361 8222 23 18 14 9
## 42726 5411 25 315 12 13
## 6999 0 1 0 1 0
## 56448 0 1 0 1 0
## 51193 145 2 29 1 1
## 10311 142 2 30 1 1
## 53946 0 1 0 1 0
## 63100 0 1 0 1 0
## 32192 126 7 21 5 2
## 17834 1021 19 38 11 8
## 11840 1640 15 1200 8 7
## 18396 95 2 30 1 1
## 36974 180 6 7 3 3
## 51924 755 2 30 1 1
## 1154 33203 71 60 41 30
## 37419 809 18 43 10 8
## 12061 60 2 5 1 1
## 47095 1139 17 142 10 7
## 20634 92 2 31 1 1
## 11071 0 1 0 1 0
## 34051 60 2 14 1 1
## 41868 9716 40 18 23 17
## 20200 98 2 30 1 1
## 43393 0 1 0 1 0
## 50015 0 1 0 1 0
## 19855 0 1 0 1 0
## 19289 0 1 0 1 0
## 59568 0 1 0 1 0
## 20456 0 1 0 1 0
## 51753 0 1 0 1 0
## 56755 0 1 0 1 0
## 1396 0 1 0 1 0
## 24703 3332 27 27 15 12
## 47162 0 1 90 1 0
## 41100 0 1 0 1 0
## 43756 0 1 5 1 0
## 47463 174 2 30 1 1
## 36910 106 2 30 1 1
## 21523 3952 22 17 13 9
## 51106 0 1 0 1 0
## 36874 97 2 30 1 1
## 24069 82 2 30 1 1
## 48118 0 1 0 1 0
## 35405 795 19 69 10 9
## 4221 0 1 0 1 0
## 52109 0 1 0 1 0
## 15172 0 1 0 1 0
## 13853 85 2 30 1 1
## 45875 37300 77 54 36 41
## 13064 0 1 0 1 0
## 13594 0 1 0 1 0
## 18344 89 2 30 1 1
## 38000 379 2 1199 1 1
## 55635 22597986 24754 320 8113 16641
## 4484 62 2 30 1 1
## 3323 90 2 30 1 1
## 17453 0 1 0 1 0
## 56203 0 1 0 1 0
## 45239 0 1 5 1 0
## 58768 160 2 30 1 1
## 33224 0 1 5 1 0
## 48820 0 1 0 1 0
## 49940 0 2 7 2 0
## 49139 966 13 61 7 6
## 28207 0 1 0 1 0
## 23732 5817 29 391 10 19
## 20414 92 2 31 1 1
## 2857 6020 20 15 11 9
## 34688 12034 25 31 11 14
## 47876 140 6 27 4 2
## 44706 0 1 0 1 0
## 7466 0 1 0 1 0
## 299 180 6 6 3 3
## 13880 176 2 1200 1 1
## 41753 0 1 0 1 0
## 8882 0 1 0 1 0
## 19941 97 2 29 1 1
## 32088 0 1 0 1 0
## 6832 0 1 0 1 0
## 42415 0 1 0 1 0
## 65526 238 2 30 1 1
## 2895 104 2 31 1 1
## 58580 89 2 30 1 1
## 25475 100 2 31 1 1
## 65009 0 1 0 1 0
## 10315 95 2 30 1 1
## 53370 2051 32 361 18 14
## 35060 144 2 31 1 1
## 60614 0 1 0 1 0
## 50106 0 1 0 1 0
## 8385 0 1 0 1 0
## 10103 0 1 0 1 0
## 65059 0 1 0 1 0
## 42426 0 1 0 1 0
## 26704 662 16 94 9 7
## 13729 355 10 255 6 4
## 5790 99851 94 15 24 70
## 26589 0 1 0 1 0
## 16578 0 3 8 3 0
## 4490 91 2 31 1 1
## 21604 415 11 342 6 5
## 18982 0 1 0 1 0
## 13694 0 1 0 1 0
## 50725 0 1 0 1 0
## 39406 0 1 0 1 0
## 30599 84 2 31 1 1
## 43405 0 1 0 1 0
## 39561 0 1 0 1 0
## 9926 0 1 30 1 0
## 9780 0 1 0 1 0
## 52308 6252 32 109 17 15
## 9877 288 2 30 1 1
## 26063 5519 23 87 12 11
## 53399 5167 17 27 9 8
## 9951 5621 31 20 17 14
## 56439 0 1 0 1 0
## 62318 58120 112 44 39 73
## 50879 74 4 30 3 1
## 36016 168 4 35 2 2
## 22278 0 1 4 1 0
## 48370 114348 200 84 101 99
## 57086 5589 24 201 13 11
## 22735 0 1 0 1 0
## 6738 0 1 0 1 0
## 20708 239 4 52 2 2
## 46397 0 1 0 1 0
## 22130 0 1 0 1 0
## 40081 149 2 30 1 1
## 6501 2472 10 120 5 5
## 18037 0 1 0 1 0
## 18950 87 2 31 1 1
## 43834 82 2 29 1 1
## 60049 3992 42 186 22 20
## 60588 0 1 0 1 0
## 62148 74 3 11 2 1
## 3520 254 2 30 1 1
## 21810 454 15 39 10 5
## 44472 0 4 12 4 0
## 37859 438835 488 66 185 303
## 29802 189 4 31 2 2
## 33632 0 1 5 1 0
## 29501 0 1 5 1 0
## 46373 93 2 30 1 1
## 28722 0 1 0 1 0
## 56963 82 2 29 1 1
## 35515 114 2 30 1 1
## 7642 0 1 0 1 0
## 9193 0 1 0 1 0
## 50382 0 1 0 1 0
## 27418 29974 45 123 18 27
## 52448 0 1 0 1 0
## 23072 93 2 30 1 1
## 30147 0 1 0 1 0
## 44119 1614 7 120 3 4
## 64169 74 4 0 3 1
## 12619 668 2 31 1 1
## 28265 1614 7 120 3 4
## 6073 0 1 0 1 0
## 65152 0 1 0 1 0
## 52381 0 1 0 1 0
## 18375 93 2 31 1 1
## 24541 4467 16 15 7 9
## 34697 0 1 0 1 0
## 54059 5186 24 76 13 11
## 59260 568843 497 38 114 383
## 27153 180 6 6 3 3
## 61217 270 6 31 3 3
## 17470 0 1 0 1 0
## 41021 0 1 0 1 0
## 62674 1311 18 17 9 9
## 47772 81 2 29 1 1
## 6539 146 3 33 2 1
## 39436 0 1 0 1 0
## 49383 0 1 0 1 0
## 30540 0 1 0 1 0
## 62053 92 2 30 1 1
## 17416 0 1 0 1 0
## 7008 0 1 0 1 0
## 57263 0 1 0 1 0
## 37999 4956 16 127 7 9
## 43125 126 6 26 4 2
## 10021 0 1 0 1 0
## 5759 126 7 26 5 2
## 3543 7429 26 93 11 15
## 42108 0 1 0 1 0
## 10333 719 17 36 9 8
## 22250 0 1 0 1 0
## 63362 0 2 8 2 0
## 42107 0 1 0 1 0
## 37885 99 2 29 1 1
## 48353 91 2 30 1 1
## 56872 4453 23 16 13 10
## 45439 774 2 31 1 1
## 9708 0 1 0 1 0
## 52345 5057 38 20 21 17
## 17467 0 1 0 1 0
## 53809 0 1 5 1 0
## 49239 0 1 0 1 0
## 43093 0 2 8 2 0
## 32259 0 1 0 1 0
## 12191 267 6 30 3 3
## 15647 134 2 33 1 1
## 4951 97 2 30 1 1
## 23968 0 1 0 1 0
## 19777 92 2 31 1 1
## 59511 0 1 0 1 0
## 2995 0 3 13 3 0
## 5866 6046 25 361 13 12
## 28426 0 1 0 1 0
## 3616 0 1 0 1 0
## 13391 22675 34 41 14 20
## 39370 89 2 30 1 1
## 55447 84 2 29 1 1
## 24869 0 1 0 1 0
## 44665 188 2 30 1 1
## 63753 726 2 30 1 1
## 64667 0 1 0 1 0
## 16094 145 2 33 1 1
## 42002 0 2 8 2 0
## 33910 0 1 0 1 0
## 17145 89 2 30 1 1
## 46779 66 6 15 5 1
## 10282 1714 17 32 9 8
## 67 12798 29 15 14 15
## 35695 0 1 0 1 0
## 37981 0 1 1199 1 0
## 13702 0 1 0 1 0
## 1194 83 2 30 1 1
## 61358 0 1 0 1 0
## 35140 83 2 30 1 1
## 10447 0 1 0 1 0
## 50583 754 4 32 3 1
## 17473 0 1 0 1 0
## 55426 84 2 29 1 1
## 21111 2019 19 18 10 9
## 55223 0 1 0 1 0
## 58596 7942 38 120 20 18
## 36957 91 2 31 1 1
## 51457 0 1 0 1 0
## 2957 0 1 0 1 0
## 3828 0 1 5 1 0
## 23251 0 1 0 1 0
## 62058 2907 19 27 11 8
## 12622 659 2 31 1 1
## 45778 94 2 30 1 1
## 44347 0 1 0 1 0
## 40350 0 1 0 1 0
## 4716 0 1 0 1 0
## 18161 82 2 31 1 1
## 40307 0 1 0 1 0
## 29537 0 1 0 1 0
## 26918 0 1 0 1 0
## 15757 0 1 0 1 0
## 63252 0 7 3616 7 0
## 19603 0 1 0 1 0
## 41439 0 1 0 1 0
## 12858 633265 779 118 335 444
## 38480 379 4 54 2 2
## 65135 45628 65 46 29 36
## 9925 83 2 30 1 1
## 8855 0 1 0 1 0
## 12669 0 1 0 1 0
## 44887 0 1 0 1 0
## 58489 4615 9 15 4 5
## 2916 0 2 8 2 0
## 58527 0 2 7 2 0
## 32516 5989 20 16 11 9
## 44165 83 2 30 1 1
## 28845 0 1 0 1 0
## 24255 424 12 16 7 5
## 49754 0 1 0 1 0
## 10975 0 1 0 1 0
## 43106 109 2 30 1 1
## 126 0 1 5 1 0
## 30358 100 2 31 1 1
## 39835 0 1 0 1 0
## 11092 0 1 0 1 0
## 58970 183 2 30 1 1
## 38068 0 1 0 1 0
## 56544 106 2 30 1 1
## 49764 0 1 0 1 0
## 54740 2043 26 418 14 12
## 63352 60 2 15 1 1
## 7364 0 1 0 1 0
## 44939 503 11 25 6 5
## 10246 229 4 47 2 2
## 5061 122 6 15 4 2
## 59974 139 2 30 1 1
## 44699 0 1 0 1 0
## 31081 84 2 30 1 1
## 48377 90 2 29 1 1
## 51888 0 1 0 1 0
## 62738 7438 35 92 17 18
## 62006 16626 32 28 13 19
## 21966 21734 27 85 10 17
## 42931 0 1 0 1 0
## 25772 90 2 30 1 1
## 49250 0 1 4 1 0
## 29343 0 1 0 1 0
## 46830 97 2 30 1 1
## 64366 118 2 30 1 1
## 32378 102 2 30 1 1
## 30177 5142 25 25 14 11
## 63199 0 1 0 1 0
## 4908 176 4 47 2 2
## 50498 0 6 3609 6 0
## 8854 0 1 0 1 0
## 63193 0 1 0 1 0
## 64109 4827641 5384 290 1964 3420
## 33814 0 1 0 1 0
## 56606 919 10 15 5 5
## 32064 0 1 0 1 0
## 53274 0 1 0 1 0
## 58640 0 1 0 1 0
## 13415 126 6 43 4 2
## 61401 0 1 5 1 0
## 40211 0 1 0 1 0
## 33513 142 2 30 1 1
## 22778 0 1 0 1 0
## 3392 856 13 36 7 6
## 20397 1319 4 41 2 2
## 57570 436 4 36 2 2
## 32793 0 4 12 4 0
## 58987 4394 18 17 10 8
## 37694 701 10 15 5 5
## 17478 0 1 0 1 0
## 37416 7564 47 68 25 22
## 46727 1567 14 16 7 7
## 59235 0 1 0 1 0
## 46854 82 2 30 1 1
## 16105 6945 51 161 29 22
## 47560 0 1 0 1 0
## 64789 126 6 26 4 2
## 15460 282 2 30 1 1
## 56809 0 1 0 1 0
## 16910 0 1 0 1 0
## 53389 91 2 31 1 1
## 43035 424 10 25 6 4
## 60235 0 1 0 1 0
## 41093 4790 22 53 12 10
## 56126 440 10 15 5 5
## 62730 2319 13 135 6 7
## 45910 0 2 8 2 0
## 41809 0 1 0 1 0
## 1267 8999 28 62 16 12
## 19261 0 1 0 1 0
## 2440 228 2 29 1 1
## 648 0 1 0 1 0
## 53215 0 1 0 1 0
## 8936 1487 14 15 7 7
## 15448 7153 48 27 26 22
## 15150 0 1 0 1 0
## 37710 121 2 30 1 1
## 25677 91 2 30 1 1
## 43700 3607 17 75 10 7
## 12795 3445 16 26 9 7
## 59550 0 1 0 1 0
## 25684 437 4 46 2 2
## 41534 0 1 0 1 0
## 59342 74 4 26 3 1
## 47800 99 2 29 1 1
## 45862 5509 24 224 14 10
## 38812 80 2 29 1 1
## 34105 0 2 8 2 0
## 40698 6127 21 16 11 10
## 47109 96 2 30 1 1
## 6070 0 1 0 1 0
## 4936 99 2 30 1 1
## 48585 89 2 31 1 1
## 16349 4466 16 15 7 9
## 34766 2907 19 26 11 8
## 30601 5003 19 136 9 10
## 25224 104 2 30 1 1
## 40039 0 1 0 1 0
## 21238 0 1 0 1 0
## 38457 97 2 30 1 1
## 31129 82 3 44 2 1
## 45840 83 2 29 1 1
## 39498 0 1 0 1 0
## 937 5348 38 50 18 20
## 41842 0 1 0 1 0
## 10481 0 1 0 1 0
## 9401 0 1 0 1 0
## 38591 0 1 0 1 0
## 32514 89 2 31 1 1
## 36438 83 2 30 1 1
## 46934 418 8 81 4 4
## 654 0 1 0 1 0
## 42409 0 1 0 1 0
## 45238 5486 28 373 14 14
## 53974 0 1 0 1 0
## 20276 0 1 0 1 0
## 28562 1262 17 79 10 7
## 60239 0 1 0 1 0
## 25939 0 1 0 1 0
## 53410 258 6 30 3 3
## 4500 89 2 31 1 1
## 15016 0 1 0 1 0
## 44394 0 1 0 1 0
## 22466 0 1 0 1 0
## 45696 0 1 0 1 0
## 37977 248 2 31 1 1
## 4418 2149 35 94 18 17
## 14324 392 8 31 4 4
## 26789 290 2 30 1 1
## 58895 180 6 6 3 3
## 19359 4468 16 16 7 9
## 55053 2815 46 82 22 24
## 23932 0 1 0 1 0
## 18862 94 2 29 1 1
## 33255 276 2 30 1 1
## 4635 0 1 0 1 0
## 57940 0 4 12 4 0
## 40995 0 1 0 1 0
## 17949 0 1 0 1 0
## 51586 0 4 12 4 0
## 50158 117 2 30 1 1
## 42441 0 1 0 1 0
## 23657 0 1 0 1 0
## 47732 647 2 31 1 1
## 30165 83 2 30 1 1
## 39466 0 1 0 1 0
## 129 0 1 5 1 0
## 9309 40501 74 47 36 38
## 19029 0 1 0 1 0
## 62043 110 2 30 1 1
## 19374 104 2 30 1 1
## 48164 0 1 0 1 0
## 38602 0 1 0 1 0
## 45567 0 1 0 1 0
## 25998 0 2 8 2 0
## 8097 2105 20 19 12 8
## 56250 0 1 0 1 0
## 21983 0 1 30 1 0
## 47031 0 1 0 1 0
## 22223 0 1 0 1 0
## 12560 0 1 5 1 0
## 3821 0 1 5 1 0
## 46484 0 1 0 1 0
## 16385 3779 17 30 9 8
## 24859 0 1 0 1 0
## 35478 83 2 30 1 1
## 24834 0 1 0 1 0
## 44816 2907 19 29 11 8
## 50037 0 1 0 1 0
## 57070 842 14 224 7 7
## 31616 0 1 0 1 0
## 10860 0 1 0 1 0
## 1625 85 2 30 1 1
## 14913 19365 49 28 24 25
## 12625 3274 17 135 9 8
## 63031 17621 79 141 40 39
## 35057 90 2 30 1 1
## 6705 0 1 0 1 0
## 62480 5575 18 57 9 9
## 59860 0 1 0 1 0
## 42139 0 1 0 1 0
## 43081 517 11 15 6 5
## 59150 0 1 0 1 0
## 36476 518 4 1200 2 2
## 43734 1508 4 31 2 2
## 6615 2303 12 23 7 5
## 10184 6854 25 186 14 11
## 9778 0 1 0 1 0
## 20712 1876248 1747 21 470 1277
## 13640 90 2 30 1 1
## 59389 2155 14 135 6 8
## 5523 100 2 30 1 1
## 29064 175229 238 206 101 137
## 27924 1883 25 117 13 12
## 14337 5715 38 271 20 18
## 48881 86 2 30 1 1
## 50532 0 1 0 1 0
## 55425 92 2 29 1 1
## 11647 0 2 8 2 0
## 2860 5284 27 316 13 14
## 28080 0 1 0 1 0
## 29785 820 5 30 1 4
## 39675 306 11 60 6 5
## 12618 647 2 31 1 1
## 11206 195 4 32 2 2
## 50716 0 1 0 1 0
## 24100 138641 190 19 88 102
## 7677 0 1 0 1 0
## 17044 101 2 31 1 1
## 41904 0 1 0 1 0
## 60500 0 1 5 1 0
## 23255 0 1 0 1 0
## 15255 0 1 0 1 0
## 38270 0 1 0 1 0
## 60237 0 1 0 1 0
## 40274 0 1 0 1 0
## 34028 81 2 30 1 1
## 24954 2512 17 137 8 9
## 24440 0 1 0 1 0
## 4543 89 2 30 1 1
## 8889 0 1 0 1 0
## 26404 0 1 0 1 0
## 1332 0 1 0 1 0
## 64088 3183 16 26 9 7
## 22392 0 1 0 1 0
## 64184 0 1 0 1 0
## 3122 89 2 30 1 1
## 17259 110 2 30 1 1
## 35015 126 6 26 4 2
## 34728 0 1 0 1 0
## 29325 0 1 0 1 0
## 56196 0 1 0 1 0
## 9522 145 2 30 1 1
## 8004 0 1 0 1 0
## 41413 0 1 0 1 0
## 19198 4671 19 101 11 8
## 59999 90 2 30 1 1
## 47586 0 1 0 1 0
## 40981 0 1 0 1 0
## 24452 0 1 0 1 0
## 6731 0 1 0 1 0
## 13483 80 2 31 1 1
## 60650 0 1 0 1 0
## 30117 29488 67 128 28 39
## 21332 0 1 0 1 0
## 37240 0 1 0 1 0
## 31307 5092 23 19 13 10
## 19466 79 2 30 1 1
## 16111 60073 121 324 66 55
## 41387 0 1 0 1 0
## 18679 5603 26 211 14 12
## 50369 0 1 0 1 0
## 41349 0 1 0 1 0
## 29521 0 1 0 1 0
## 13011 0 1 0 1 0
## 7192 0 1 0 1 0
## 45024 105 2 30 1 1
## 65239 187746 336 317 183 153
## 25580 0 1 0 1 0
## 37836 107 2 30 1 1
## 31840 4468 16 15 7 9
## 22142 0 1 0 1 0
## 62123 109 2 31 1 1
## 12840 6669 29 413 11 18
## 25670 5254 23 27 13 10
## 31825 213 4 33 2 2
## 25728 104 2 30 1 1
## 65151 0 1 0 1 0
## 48911 97 2 30 1 1
## 57171 101 2 30 1 1
## 4733 0 1 0 1 0
## 55275 0 1 0 1 0
## 49213 0 1 0 1 0
## 10523 0 1 0 1 0
## 20474 134 3 32 2 1
## 23319 6470 53 616 28 25
## 37131 0 1 0 1 0
## 44281 0 1 1200 1 0
## 34899 0 1 0 1 0
## 25149 97 2 30 1 1
## 10468 0 1 0 1 0
## 968 0 1 0 1 0
## 59957 0 1 0 1 0
## 26999 1413 10 15 6 4
## 4953 96 2 30 1 1
## 16950 0 1 0 1 0
## 43154 0 1 0 1 0
## 10027 0 1 0 1 0
## 45632 1230 10 20 6 4
## 38163 0 1 0 1 0
## 38768 0 1 0 1 0
## 41171 80936 92 125 32 60
## 6703 0 1 0 1 0
## 39721 97 2 30 1 1
## 61720 0 1 0 1 0
## 29920 0 1 0 1 0
## 38698 7146 27 221 15 12
## 32774 6550 26 72 13 13
## 6876 0 1 0 1 0
## 17938 0 1 0 1 0
## 29961 0 1 0 1 0
## 37801 104 2 30 1 1
## 64428 83 2 30 1 1
## 46086 83 2 30 1 1
## 23041 90 2 29 1 1
## 42350 105 2 30 1 1
## 348 1494 15 38 9 6
## 40766 4242 26 329 14 12
## 31159 6384 27 32 13 14
## 18513 0 1 0 1 0
## 14618 200 4 51 2 2
## 39559 0 1 0 1 0
## 5038 97 2 30 1 1
## 48438 94 2 30 1 1
## 1831 0 1 0 1 0
## 35323 0 1 0 1 0
## 37857 4977 23 45 13 10
## 7285 0 1 0 1 0
## 16144 0 1 0 1 0
## 47434 101 2 30 1 1
## 9231 0 1 0 1 0
## 45212 0 1 0 1 0
## 18148 285 2 31 1 1
## 43671 0 1 0 1 0
## 5536 97 2 29 1 1
## 62174 4981 18 131 9 9
## 38306 3802 18 45 11 7
## 40841 208 2 30 1 1
## 60676 0 1 0 1 0
## 10502 0 1 0 1 0
## 37436 5853 22 16 12 10
## 4795 164 2 30 1 1
## 6256 5738496 4959 154 976 3983
## 37313 660 2 30 1 1
## 50684 0 1 0 1 0
## 34386 324 12 195 7 5
## 52359 145 2 33 1 1
## 9890 128 2 30 1 1
## 65303 5786940 5053 78 1161 3892
## 20505 92 2 30 1 1
## 31820 0 1 4 1 0
## 40972 0 1 0 1 0
## 37810 0 1 30 1 0
## 54407 0 1 0 1 0
## 28776 0 1 0 1 0
## 30685 122 9 15 7 2
## 39729 204 8 141 5 3
## 64756 7881327 6752 120 1393 5359
## 2159 22444 36 318 19 17
## 43849 90 2 30 1 1
## 43733 1498 4 31 2 2
## 55828 6134 24 81 14 10
## 39333 99 2 30 1 1
## 42519 0 1 0 1 0
## 21231 1249 17 16 8 9
## 4956 91 2 30 1 1
## 42738 74 4 35 3 1
## 12737 3436 16 26 9 7
## 17356 0 1 0 1 0
## 21136 179 4 41 2 2
## 56176 83 2 31 1 1
## 47194 105 2 31 1 1
## 42498 0 1 0 1 0
## 47156 99 2 30 1 1
## 25443 90 2 30 1 1
## 50638 246 9 35 5 4
## 36705 80 2 31 1 1
## 11590 0 1 0 1 0
## 48757 280 9 32 5 4
## 10712 353 4 33 2 2
## 29445 4466 16 15 7 9
## 54553 0 1 0 1 0
## 56357 656 2 30 1 1
## 29759 589 12 48 6 6
## 40385 5099 28 46 16 12
## 60274 0 1 0 1 0
## 38239 0 1 0 1 0
## 44772 0 1 0 1 0
## 46155 2566 14 135 7 7
## 60110 60 2 15 1 1
## 44915 25982 46 323 19 27
## 22420 0 1 0 1 0
## 46789 97 2 29 1 1
## 62768 0 1 0 1 0
## 41684 0 3 1200 3 0
## 11564 0 1 0 1 0
## 38847 121 2 30 1 1
## 21965 62 2 31 1 1
## 37530 0 1 0 1 0
## 16743 84 2 30 1 1
## 41515 2744 35 425 11 24
## 543 83 2 30 1 1
## 3988 90 2 30 1 1
## 57777 74 4 0 3 1
## 25542 0 1 0 1 0
## 58279 0 1 0 1 0
## 35155 354 12 28 7 5
## 5340 8730 28 525 12 16
## 27236 1614 8 120 4 4
## 44362 0 1 0 1 0
## 32797 109 2 30 1 1
## 52902 11175 107 31 51 56
## 42171 0 1 0 1 0
## 3124 83 2 30 1 1
## 12614 82 2 31 1 1
## 65506 3896 19 272 11 8
## 18576 0 1 0 1 0
## 53355 95 2 30 1 1
## 42124 0 1 0 1 0
## 38205 91 2 31 1 1
## 31562 0 1 0 1 0
## 56730 0 1 0 1 0
## 1160 90 2 30 1 1
## 46391 0 1 0 1 0
## 7486 0 1 0 1 0
## 54079 186 7 93 4 3
## 5158 0 1 0 1 0
## 35908 0 1 0 1 0
## 37132 0 1 0 1 0
## 2820 0 1 0 1 0
## 13283 79 2 29 1 1
## 23624 0 1 0 1 0
## 56373 1562 46 194 23 23
## 57240 0 1 5 1 0
## 59170 0 1 0 1 0
## 21379 0 1 0 1 0
## 5372 86 2 30 1 1
## 33558 2907 19 30 11 8
## 5211 0 1 0 1 0
## 48266 84 2 31 1 1
## 5001 349795 470 143 214 256
## 53703 5430 18 326 9 9
## 2760 0 1 0 1 0
## 42501 0 1 0 1 0
## 13955 0 1 0 1 0
## 38479 3606 13 46 7 6
## 2007 0 1 0 1 0
## 826 348 9 28 4 5
## 52165 0 1 0 1 0
## 55552 94 2 30 1 1
## 51633 173 2 1203 1 1
## 37398 84 2 31 1 1
## 45954 0 1 0 1 0
## 57835 0 1 0 1 0
## 5349 0 1 5 1 0
## 22697 60 2 15 1 1
## 56202 0 1 0 1 0
## 13350 24296 108 265 55 53
## 8258 205 2 29 1 1
## 48253 140 4 11 2 2
## 20797 0 1 0 1 0
## 248 0 1 0 1 0
## 26891 0 1 0 1 0
## 63302 97 2 29 1 1
## 4057 1143 24 119 11 13
## 29021 13979742 12752 54 3515 9237
## 26932 0 1 0 1 0
## 22840 0 1 0 1 0
## 6230 127 2 30 1 1
## 41527 0 1 0 1 0
## 19558 540 12 1316 6 6
## 30532 0 1 0 1 0
## 30065 126 6 30 4 2
## 44966 7731 28 27 16 12
## 49462 4075 15 28 9 6
## 31542 0 1 0 1 0
## 20933 4575 21 256 11 10
## 12377 0 1 0 1 0
## 16459 0 1 0 1 0
## 16917 70 3 0 2 1
## 48849 146 2 31 1 1
## 58835 0 1 0 1 0
## 33042 0 1 0 1 0
## 40843 2907 19 30 11 8
## 7860 0 1 0 1 0
## 5928 115 2 30 1 1
## 61501 7109 32 287 16 16
## 63685 4512375 4382 213 1198 3184
## 14451 0 1 0 1 0
## 8193 126 7 21 5 2
## 63312 60 2 4 1 1
## 40372 0 1 0 1 0
## 59106 0 1 0 1 0
## 57863 0 1 0 1 0
## 5425 0 1 0 1 0
## 18274 5349 24 138 12 12
## 9576 0 2 8 2 0
## 45003 0 1 4 1 0
## 61281 0 1 0 1 0
## 55213 0 1 0 1 0
## 42753 16102 146 556 65 81
## 5847 0 1 0 1 0
## 10623 0 1 0 1 0
## 7664 0 1 0 1 0
## 3982 158 2 30 1 1
## 62328 2660 39 47 21 18
## 7794 0 1 0 1 0
## 54274 83 2 31 1 1
## 11335 3221 12 16 6 6
## 22825 0 1 0 1 0
##Persistent Homology of IntFirewallData_one_hot_1000_df dataset
# calculate persistent homology for IntFirewallData_one_hot_1000_df Dataset
phom_IntFirewallData_one_hot_1000_df <- calculate_homology(IntFirewallData_one_hot_1000_df)
# plot barcode for IntFirewallData_one_hot_1000_df Dataset
plot_barcode(phom_IntFirewallData_one_hot_1000_df)

# plot persistent diagram of IntFirewallData_one_hot_1000_df Dataset
plot_persist(phom_IntFirewallData_one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
##Two Filter Functions PCA & KDE
#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))
#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate
###*** Adult Mapper 5 intervals, 50% overlap, 5 bins
m_adult_5.50.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 50,
num_bins_when_clustering = 5)
g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.50.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.50.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.50.5$points_in_vertex))
## List of 5
## $ : int [1:4917] 8 10 12 21 26 46 64 69 73 87 ...
## $ : int [1:12206] 2 8 10 11 12 15 21 24 26 28 ...
## $ : int [1:13240] 1 2 4 5 6 9 11 15 16 19 ...
## $ : int [1:16700] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:14404] 3 7 13 14 17 18 22 25 27 32 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.50.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.50.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_adult_5.50.5.n1<-m_adult_5.50.5$points_in_vertex[1]
m_adult_5.50.5.n1.vec<-as.vector(unlist(m_adult_5.50.5.n1))
m_adult_5.50.5.n2<-m_adult_5.50.5$points_in_vertex[2]
m_adult_5.50.5.n2.vec<-as.vector(unlist(m_adult_5.50.5.n2))
m_adult_5.50.5.n3<-m_adult_5.50.5$points_in_vertex[3]
m_adult_5.50.5.n3.vec<-as.vector(unlist(m_adult_5.50.5.n3))
m_adult_5.50.5.n4<-m_adult_5.50.5$points_in_vertex[4]
m_adult_5.50.5.n4.vec<-as.vector(unlist(m_adult_5.50.5.n4))
m_adult_5.50.5.n5<-m_adult_5.50.5$points_in_vertex[5]
m_adult_5.50.5.n5.vec<-as.vector(unlist(m_adult_5.50.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_adult_5.50.5.n1.vec,]
tda.m_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_adult_5.50.5.n2.vec,]
tda.m_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_adult_5.50.5.n3.vec,]
tda.m_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_adult_5.50.5.n4.vec,]
tda.m_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_adult_5.50.5.n5.vec,]
##*** Adult Mapper 5 intervals, 40% overlap, 5 bins
m_adult_5.40.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 40,
num_bins_when_clustering = 5)
g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5))

head(str(m_adult_5.40.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.40.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.40.5$points_in_vertex))
## List of 5
## $ : int [1:3373] 8 10 12 21 26 46 64 69 73 95 ...
## $ : int [1:10276] 2 8 10 11 12 15 21 24 26 28 ...
## $ : int [1:11563] 1 2 4 6 9 16 19 20 23 24 ...
## $ : int [1:14818] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:12081] 7 13 14 18 22 25 27 32 36 37 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.40.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.40.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.40.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

m_adult_5.40.5.n1<-m_adult_5.40.5$points_in_vertex[1]
m_adult_5.40.5.n1.vec<-as.vector(unlist(m_adult_5.40.5.n1))
m_adult_5.40.5.n2<-m_adult_5.40.5$points_in_vertex[2]
m_adult_5.40.5.n2.vec<-as.vector(unlist(m_adult_5.40.5.n2))
m_adult_5.40.5.n3<-m_adult_5.40.5$points_in_vertex[3]
m_adult_5.40.5.n3.vec<-as.vector(unlist(m_adult_5.40.5.n3))
m_adult_5.40.5.n4<-m_adult_5.40.5$points_in_vertex[4]
m_adult_5.40.5.n4.vec<-as.vector(unlist(m_adult_5.40.5.n4))
m_adult_5.40.5.n5<-m_adult_5.40.5$points_in_vertex[5]
m_adult_5.40.5.n5.vec<-as.vector(unlist(m_adult_5.40.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_adult_5.40.5.n1.vec,]
tda.m_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_adult_5.40.5.n2.vec,]
tda.m_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_adult_5.40.5.n3.vec,]
tda.m_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_adult_5.40.5.n4.vec,]
tda.m_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_adult_5.40.5.n5.vec,]
##*** Adult Mapper 5 intervals, 30% overlap, 5 bins
m_adult_5.30.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 30,
num_bins_when_clustering = 5)
g_adult_5.30.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
plot(g_adult_5.30.5, layout = layout.auto(g_adult_5.30.5))

head(str(m_adult_5.30.5$level_of_vertex))
## int [1:6] 1 1 2 3 4 5
## NULL
head(str(m_adult_5.30.5$vertices_in_level))
## List of 5
## $ : num [1:2] 1 2
## $ : num 3
## $ : num 4
## $ : num 5
## $ : num 6
## NULL
head(str(m_adult_5.30.5$points_in_vertex))
## List of 6
## $ : int [1:2254] 8 21 26 64 69 73 97 101 102 112 ...
## $ : int 27366
## $ : int [1:8644] 2 8 10 11 12 15 21 26 28 39 ...
## $ : int [1:10534] 2 4 6 9 16 19 20 23 24 28 ...
## $ : int [1:13627] 1 3 4 5 6 14 16 17 18 25 ...
## $ : int [1:9944] 7 13 22 25 32 36 38 44 52 62 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.30.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.30.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.30.5 <- graph.adjacency(m_adult_5.30.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.30.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.30.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

m_adult_5.30.5.n1<-m_adult_5.30.5$points_in_vertex[1]
m_adult_5.30.5.n1.vec<-as.vector(unlist(m_adult_5.30.5.n1))
m_adult_5.30.5.n2<-m_adult_5.30.5$points_in_vertex[2]
m_adult_5.30.5.n2.vec<-as.vector(unlist(m_adult_5.30.5.n2))
m_adult_5.30.5.n3<-m_adult_5.30.5$points_in_vertex[3]
m_adult_5.30.5.n3.vec<-as.vector(unlist(m_adult_5.30.5.n3))
m_adult_5.30.5.n4<-m_adult_5.30.5$points_in_vertex[4]
m_adult_5.30.5.n4.vec<-as.vector(unlist(m_adult_5.30.5.n4))
m_adult_5.30.5.n5<-m_adult_5.30.5$points_in_vertex[5]
m_adult_5.30.5.n5.vec<-as.vector(unlist(m_adult_5.30.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.30.5.n1.vec<-adult.one_hot_df4[m_adult_5.30.5.n1.vec,]
tda.m_adult_5.30.5.n2.vec<-adult.one_hot_df4[m_adult_5.30.5.n2.vec,]
tda.m_adult_5.30.5.n3.vec<-adult.one_hot_df4[m_adult_5.30.5.n3.vec,]
tda.m_adult_5.30.5.n4.vec<-adult.one_hot_df4[m_adult_5.30.5.n4.vec,]
tda.m_adult_5.30.5.n5.vec<-adult.one_hot_df4[m_adult_5.30.5.n5.vec,]
##*** Adult Mapper KDE Filter 5 intervals, 50% overlap, 5 bins
m_kde_adult_5.50.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 50,
num_bins_when_clustering = 5)
g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5))

head(str(m_kde_adult_5.50.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.50.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.50.5$points_in_vertex))
## List of 5
## $ : int [1:13387] 2 4 5 6 7 9 16 19 20 21 ...
## $ : int [1:12638] 1 2 6 8 9 13 20 24 25 26 ...
## $ : int [1:11634] 1 8 10 11 12 13 14 27 28 30 ...
## $ : int [1:10038] 3 10 11 12 14 15 27 30 32 34 ...
## $ : int [1:7540] 3 15 17 18 37 39 56 59 60 65 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.50.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.50.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.50.5.n1<-m_kde_adult_5.50.5$points_in_vertex[1]
m_kde_adult_5.50.5.n1.vec<-as.vector(unlist(m_kde_adult_5.50.5.n1))
m_kde_adult_5.50.5.n2<-m_kde_adult_5.50.5$points_in_vertex[2]
m_kde_adult_5.50.5.n2.vec<-as.vector(unlist(m_kde_adult_5.50.5.n2))
m_kde_adult_5.50.5.n3<-m_kde_adult_5.50.5$points_in_vertex[3]
m_kde_adult_5.50.5.n3.vec<-as.vector(unlist(m_kde_adult_5.50.5.n3))
m_kde_adult_5.50.5.n4<-m_kde_adult_5.50.5$points_in_vertex[4]
m_kde_adult_5.50.5.n4.vec<-as.vector(unlist(m_kde_adult_5.50.5.n4))
m_kde_adult_5.50.5.n5<-m_kde_adult_5.50.5 $points_in_vertex[5]
m_kde_adult_5.50.5.n5.vec<-as.vector(unlist(m_kde_adult_5.50.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n1.vec,]
tda.m_kde_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n2.vec,]
tda.m_kde_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n3.vec,]
tda.m_kde_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n4.vec,]
tda.m_kde_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n5.vec,]
##*** Adult Mapper KDE 5 intervals, 40% overlap, 5 bins
m_kde_adult_5.40.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 40,
num_bins_when_clustering = 5)
g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5))

head(str(m_kde_adult_5.40.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.40.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.40.5$points_in_vertex))
## List of 5
## $ : int [1:11838] 4 5 6 7 9 16 19 20 21 22 ...
## $ : int [1:11203] 1 2 6 9 13 20 24 25 26 29 ...
## $ : int [1:10351] 1 8 10 11 12 14 27 28 30 31 ...
## $ : int [1:8741] 3 10 11 12 14 15 27 30 32 34 ...
## $ : int [1:6628] 3 15 17 18 37 39 59 60 65 66 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.40.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.40.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.40.5.n1<-m_kde_adult_5.40.5$points_in_vertex[1]
m_kde_adult_5.40.5.n1.vec<-as.vector(unlist(m_kde_adult_5.40.5.n1))
m_kde_adult_5.40.5.n2<-m_kde_adult_5.40.5$points_in_vertex[2]
m_kde_adult_5.40.5.n2.vec<-as.vector(unlist(m_kde_adult_5.40.5.n2))
m_kde_adult_5.40.5.n3<-m_kde_adult_5.40.5$points_in_vertex[3]
m_kde_adult_5.40.5.n3.vec<-as.vector(unlist(m_kde_adult_5.40.5.n3))
m_kde_adult_5.40.5.n4<-m_kde_adult_5.40.5$points_in_vertex[4]
m_kde_adult_5.40.5.n4.vec<-as.vector(unlist(m_kde_adult_5.40.5.n4))
m_kde_adult_5.40.5.n5<-m_kde_adult_5.40.5 $points_in_vertex[5]
m_kde_adult_5.40.5.n5.vec<-as.vector(unlist(m_kde_adult_5.40.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF4 dataset
tda.m_kde_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n1.vec,]
tda.m_kde_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n2.vec,]
tda.m_kde_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n3.vec,]
tda.m_kde_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n4.vec,]
tda.m_kde_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n5.vec,]
##*** Adult Mapper KDE 5 intervals, 30% overlap, 5 bins
m_kde_adult_5.30.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 50,
num_bins_when_clustering = 5)
g_kde_adult_5.30.5 <- graph.adjacency(m_kde_adult_5.30.5$adjacency, mode="undirected")
plot(g_kde_adult_5.30.5, layout = layout.auto(g_kde_adult_5.30.5))

head(str(m_kde_adult_5.30.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.30.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.30.5$points_in_vertex))
## List of 5
## $ : int [1:13387] 2 4 5 6 7 9 16 19 20 21 ...
## $ : int [1:12638] 1 2 6 8 9 13 20 24 25 26 ...
## $ : int [1:11634] 1 8 10 11 12 13 14 27 28 30 ...
## $ : int [1:10038] 3 10 11 12 14 15 27 30 32 34 ...
## $ : int [1:7540] 3 15 17 18 37 39 56 59 60 65 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.30.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.30.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.30.5 <- graph.adjacency(m_kde_adult_5.30.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.30.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.30.5, layout = layout.auto(g_kde_adult_5.30.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.30.5.n1<-m_kde_adult_5.30.5$points_in_vertex[1]
m_kde_adult_5.30.5.n1.vec<-as.vector(unlist(m_kde_adult_5.30.5.n1))
m_kde_adult_5.30.5.n2<-m_kde_adult_5.30.5$points_in_vertex[2]
m_kde_adult_5.30.5.n2.vec<-as.vector(unlist(m_kde_adult_5.30.5.n2))
m_kde_adult_5.30.5.n3<-m_kde_adult_5.30.5$points_in_vertex[3]
m_kde_adult_5.30.5.n3.vec<-as.vector(unlist(m_kde_adult_5.30.5.n3))
m_kde_adult_5.30.5.n4<-m_kde_adult_5.30.5$points_in_vertex[4]
m_kde_adult_5.30.5.n4.vec<-as.vector(unlist(m_kde_adult_5.30.5.n4))
m_kde_adult_5.30.5.n5<-m_kde_adult_5.30.5 $points_in_vertex[5]
m_kde_adult_5.30.5.n5.vec<-as.vector(unlist(m_kde_adult_5.30.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.30.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n1.vec,]
tda.m_kde_adult_5.30.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n2.vec,]
tda.m_kde_adult_5.30.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n3.vec,]
tda.m_kde_adult_5.30.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n4.vec,]
tda.m_kde_adult_5.30.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n5.vec,]
library(caret)
trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7,
list = FALSE,
times = 1)
head(trainIndex)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models
fitControl <- trainControl(## 10-fold CV
method = "cv",
number = 3)
#Non-TDA-Assited
#Random Forest
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
adultRfFit
## Random Forest
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8070022 0.2956849
## 55 0.8587285 0.5899910
## 108 0.8546922 0.5798266
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
adultRfFit$resample
## Accuracy Kappa Resample
## 1 0.8577257 0.5823456 Fold1
## 2 0.8602264 0.5962355 Fold3
## 3 0.8582335 0.5913918 Fold2
ad_rf_fit_re<-adultRfFit$resample[1]
summary(adultRfFit)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 22793 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 45586 matrix numeric
## oob.times 22793 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 22793 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (adultRfFit)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V6.Married.civ.spouse 88.358
## V11 76.409
## V1 76.084
## V5 65.152
## V13 44.554
## V8.Husband 31.880
## V12 23.159
## V7.Exec.managerial 10.006
## V7.Prof.specialty 7.717
## V2.Private 7.037
## V8.Wife 6.986
## V7.Craft.repair 5.798
## V2.Self.emp.not.inc 5.772
## V7.Sales 5.754
## V6.Never.married 5.735
## V2.Self.emp.inc 4.338
## V2.Local.gov 4.023
## V7.Adm.clerical 3.933
## V7.Transport.moving 3.863
# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6920 905
## >50K 496 1447
##
## Accuracy : 0.8566
## 95% CI : (0.8495, 0.8635)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5829
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9331
## Specificity : 0.6152
## Pos Pred Value : 0.8843
## Neg Pred Value : 0.7447
## Prevalence : 0.7592
## Detection Rate : 0.7084
## Detection Prevalence : 0.8011
## Balanced Accuracy : 0.7742
##
## 'Positive' Class : <=50K
##
rf_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.565725e-01 5.829494e-01 8.494641e-01 8.634675e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.511642e-125 1.147941e-27
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9331176 0.6152211 0.8843450
## Neg Pred Value Precision Recall
## 0.7447247 0.8843450 0.9331176
## F1 Prevalence Detection Rate
## 0.9080769 0.7592138 0.7084357
## Detection Prevalence Balanced Accuracy
## 0.8010852 0.7741693
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_PC_5.50.5_n1_RfFit0
## Random Forest
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3278, 3277, 3279
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9733579 0.0000000
## 55 0.9725445 0.1097594
## 108 0.9719342 0.1154372
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
Adult_TDA_PC_5.50.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9737485 0 Fold3
## 2 0.9731707 0 Fold2
## 3 0.9731544 0 Fold1
ad_tda_pc_5.50.5_n1_rf_fit0_re<-Adult_TDA_PC_5.50.5_n1_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 4917 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 9834 matrix numeric
## oob.times 4917 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 4917 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_PC_5.50.5_n1_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V13 100.000
## V2.Self.emp.not.inc 94.113
## V5 68.061
## V1 66.653
## V2.Private 65.355
## V4.Doctorate 35.644
## V11 31.734
## V2.Self.emp.inc 26.326
## V3 24.206
## V7.Farming.fishing 22.429
## V7.Prof.specialty 16.876
## V12 15.995
## V4.Prof.school 11.432
## V4.Some.college 10.799
## V4.Masters 9.912
## V4.HS.grad 9.565
## V14.South 4.917
## V7.Craft.repair 4.520
## V7.Sales 4.446
## V4.Bachelors 3.956
# Predict outcome using Adult_TDA_PC_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_pc_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n1_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n1_3_fold
## Accuracy
## 1 -0.1160228
## 2 -0.1129444
## 3 -0.1149208
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n1_3_fold$probRight
bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0.991
##
## $winRope
## [1] 0.009
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_3_fold
## $left
## [1] 0.9999506
##
## $rope
## [1] 1.457494e-05
##
## $right
## [1] 3.480343e-05
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold)
## t = -127.29, df = 2, p-value = 6.171e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1185040 -0.1107546
## sample estimates:
## mean of x
## -0.1146293
### Test set diff
diff_tda_pca_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n1_test
## Accuracy
## 0.6157862
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_test_odds.left<-bst_tda_pca_5.50.5_rf.n1_test$probLeft/bst_tda_pca_5.50.5_rf.n1_test$probRight
bst_tda_pca_5.50.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1570333
##
## $winRight
## [1] 0.8429667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n1_test)) #bf_tda_pca_5.50.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n1_test))
##Node2
Adult_TDA_PC_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_PC_5.50.5_n2_RfFit0
## Random Forest
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8136, 8138, 8138
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7077661 0.4120403
## 55 0.7302137 0.4589437
## 108 0.7257890 0.4497935
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.7361179 0.4709338 Fold1
## 2 0.7224680 0.4434426 Fold3
## 3 0.7320551 0.4624548 Fold2
ad_tda_pc_5.50.5_n2_rf_fit0_re<-Adult_TDA_PC_5.50.5_n2_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 12206 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 24412 matrix numeric
## oob.times 12206 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 12206 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_PC_5.50.5_n2_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V1 68.602
## V5 49.314
## V11 43.109
## V13 41.302
## V12 20.828
## V8.Husband 17.115
## V7.Exec.managerial 6.686
## V2.Private 6.347
## V2.Self.emp.not.inc 6.289
## V7.Craft.repair 5.899
## V7.Sales 5.370
## V7.Prof.specialty 4.619
## V7.Transport.moving 4.429
## V9.Black 4.236
## V2.Self.emp.inc 4.044
## V2.Local.gov 3.953
## V7.Machine.op.inspct 3.706
## V4.HS.grad 3.624
## V7.Adm.clerical 3.503
# Predict outcome using Adult_TDA_PC_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1956 5
## >50K 5460 2347
##
## Accuracy : 0.4405
## 95% CI : (0.4306, 0.4504)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.146
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2638
## Specificity : 0.9979
## Pos Pred Value : 0.9975
## Neg Pred Value : 0.3006
## Prevalence : 0.7592
## Detection Rate : 0.2002
## Detection Prevalence : 0.2008
## Balanced Accuracy : 0.6308
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1956 5
## >50K 5460 2347
##
## Accuracy : 0.4405
## 95% CI : (0.4306, 0.4504)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.146
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2638
## Specificity : 0.9979
## Pos Pred Value : 0.9975
## Neg Pred Value : 0.3006
## Prevalence : 0.7592
## Detection Rate : 0.2002
## Detection Prevalence : 0.2008
## Balanced Accuracy : 0.6308
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.4405201 0.1460088 0.4306427 0.4504329 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2637540 0.9978741 0.9974503
## Neg Pred Value Precision Recall
## 0.3006276 0.9974503 0.2637540
## F1 Prevalence Detection Rate
## 0.4171910 0.7592138 0.2002457
## Detection Prevalence Balanced Accuracy
## 0.2007576 0.6308141
ad_tda_pc_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n2_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n2_3_fold
## Accuracy
## 1 0.1216078
## 2 0.1377583
## 3 0.1261784
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n2_3_fold$probRight
bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009166667
##
## $winRight
## [1] 0.9908333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0008007738
##
## $rope
## [1] 0.0002921139
##
## $right
## [1] 0.9989071
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold)
## t = 26.738, df = 2, p-value = 0.001396
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1078346 0.1491951
## sample estimates:
## mean of x
## 0.1285149
### Test set diff
diff_tda_pca_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n2_test
## Accuracy
## 0.4160524
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_test_odds.left<-bst_tda_pca_5.50.5_rf.n2_test$probLeft/bst_tda_pca_5.50.5_rf.n2_test$probRight
bst_tda_pca_5.50.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1559333
##
## $winRight
## [1] 0.8440667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),c(-0.01,0.01)))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n2_test)) #bf_tda_pca_5.50.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_PC_5.50.5_n3_RfFit0
## Random Forest
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7941844 0.1885370
## 55 0.8434294 0.4983357
## 108 0.8395774 0.4903697
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8474960 0.5104167 Fold1
## 2 0.8382420 0.4885495 Fold3
## 3 0.8445502 0.4960410 Fold2
ad_tda_pc_5.50.5_n3_rf_fit0_re<-Adult_TDA_PC_5.50.5_n3_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 13240 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 26480 matrix numeric
## oob.times 13240 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 13240 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_PC_5.50.5_n3_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V1 73.769
## V11 63.907
## V13 39.813
## V10.Male 38.125
## V10.Female 34.484
## V5 20.554
## V12 11.902
## V2.Private 7.854
## V7.Adm.clerical 6.049
## V8.Wife 5.965
## V6.Married.civ.spouse 5.431
## V7.Sales 5.416
## V7.Exec.managerial 5.355
## V4.Some.college 5.326
## V7.Prof.specialty 5.079
## V4.HS.grad 4.928
## V7.Craft.repair 4.539
## V9.White 4.434
## V9.Black 4.423
# Predict outcome using Adult_TDA_PC_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4793 1129
## >50K 2623 1223
##
## Accuracy : 0.6159
## 95% CI : (0.6062, 0.6255)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1367
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6463
## Specificity : 0.5200
## Pos Pred Value : 0.8094
## Neg Pred Value : 0.3180
## Prevalence : 0.7592
## Detection Rate : 0.4907
## Detection Prevalence : 0.6063
## Balanced Accuracy : 0.5831
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4793 1129
## >50K 2623 1223
##
## Accuracy : 0.6159
## 95% CI : (0.6062, 0.6255)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1367
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6463
## Specificity : 0.5200
## Pos Pred Value : 0.8094
## Neg Pred Value : 0.3180
## Prevalence : 0.7592
## Detection Rate : 0.4907
## Detection Prevalence : 0.6063
## Balanced Accuracy : 0.5831
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.158886e-01 1.366520e-01 6.061585e-01 6.255496e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.220547e-131
ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6463053 0.5199830 0.8093549
## Neg Pred Value Precision Recall
## 0.3179927 0.8093549 0.6463053
## F1 Prevalence Detection Rate
## 0.7186985 0.7592138 0.4906839
## Detection Prevalence Balanced Accuracy
## 0.6062654 0.5831441
ad_tda_pc_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n3_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n3_3_fold
## Accuracy
## 1 0.01022968
## 2 0.02198442
## 3 0.01368332
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n3_3_fold$probRight
bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.09083333
##
## $winRight
## [1] 0.9091667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0008007738
##
## $rope
## [1] 0.0002921139
##
## $right
## [1] 0.9989071
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold)
## t = 4.386, df = 2, p-value = 0.04825
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.0002908355 0.0303074455
## sample estimates:
## mean of x
## 0.01529914
### Test set diff
diff_tda_pca_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n3_test
## Accuracy
## 0.2406839
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_test_odds.left<-bst_tda_pca_5.50.5_rf.n3_test$probLeft/bst_tda_pca_5.50.5_rf.n3_test$probRight
bst_tda_pca_5.50.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1602333
##
## $winRight
## [1] 0.8397667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n3_test))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n3_test)) #bf_tda_pca_5.50.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test)
##Node4
Adult_TDA_PC_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_PC_5.50.5_n4_RfFit0
## Random Forest
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11134, 11133
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9449102 0.0000000
## 55 0.9543712 0.3859860
## 108 0.9541317 0.3900239
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9545536 0.4088788 Fold1
## 2 0.9543740 0.3628334 Fold3
## 3 0.9541861 0.3862458 Fold2
ad_tda_pc_5.50.5_n4_rf_fit0_re<-Adult_TDA_PC_5.50.5_n4_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 16700 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 33400 matrix numeric
## oob.times 16700 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 16700 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_PC_5.50.5_n4_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V11 100.000
## V3 96.754
## V1 67.935
## V13 38.125
## V8.Wife 24.948
## V5 17.823
## V12 10.747
## V7.Adm.clerical 10.074
## V2.Private 7.639
## V7.Exec.managerial 6.168
## V4.Some.college 6.014
## V7.Prof.specialty 5.380
## V6.Divorced 5.335
## V9.Black 5.319
## V10.Female 5.213
## V10.Male 5.163
## V6.Married.civ.spouse 5.027
## V6.Never.married 4.949
## V8.Not.in.family 4.928
## V7.Sales 4.912
# Predict outcome using Adult_TDA_PC_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 1708
## >50K 2 644
##
## Accuracy : 0.8249
## 95% CI : (0.8173, 0.8324)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3636
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9997
## Specificity : 0.2738
## Pos Pred Value : 0.8128
## Neg Pred Value : 0.9969
## Prevalence : 0.7592
## Detection Rate : 0.7590
## Detection Prevalence : 0.9339
## Balanced Accuracy : 0.6368
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 1708
## >50K 2 644
##
## Accuracy : 0.8249
## 95% CI : (0.8173, 0.8324)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3636
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9997
## Specificity : 0.2738
## Pos Pred Value : 0.8128
## Neg Pred Value : 0.9969
## Prevalence : 0.7592
## Detection Rate : 0.7590
## Detection Prevalence : 0.9339
## Balanced Accuracy : 0.6368
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.249386e-01 3.635798e-01 8.172547e-01 8.324282e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.874619e-56 0.000000e+00
ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9997303 0.2738095 0.8127604
## Neg Pred Value Precision Recall
## 0.9969040 0.8127604 0.9997303
## F1 Prevalence Detection Rate
## 0.8966018 0.7592138 0.7590090
## Detection Prevalence Balanced Accuracy
## 0.9338657 0.6367699
ad_tda_pc_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n4_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n4_3_fold
## Accuracy
## 1 -0.09682790
## 2 -0.09414761
## 3 -0.09595262
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n4_3_fold$probRight
bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0.9908333
##
## $winRope
## [1] 0.009166667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_3_fold
## $left
## [1] 0.9999434
##
## $rope
## [1] 1.939537e-05
##
## $right
## [1] 3.719125e-05
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold)
## t = -121.21, df = 2, p-value = 6.806e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.09903792 -0.09224751
## sample estimates:
## mean of x
## -0.09564271
### Test set diff
diff_tda_pca_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n4_test
## Accuracy
## 0.03163391
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_test_odds.left<-bst_tda_pca_5.50.5_rf.n4_test$probLeft/bst_tda_pca_5.50.5_rf.n4_test$probRight
bst_tda_pca_5.50.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1563
##
## $winRight
## [1] 0.8437
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n4_test))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n4_test)) #bf_tda_pca_5.50.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_PC_5.50.5_n5_RfFit0
## Random Forest
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9602, 9603, 9603
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9979867 0.0000000
## 55 0.9981256 0.2793028
## 108 0.9979867 0.2659022
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9975010 0.1418359 Fold1
## 2 0.9985420 0.3632023 Fold3
## 3 0.9983337 0.3328701 Fold2
ad_tda_pc_5.50.5_n5_rf_fit0_re<-Adult_TDA_PC_5.50.5_n5_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 14404 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 28808 matrix numeric
## oob.times 14404 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 14404 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_PC_5.50.5_n5_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V11 75.709
## V1 53.069
## V13 37.465
## V7.Other.service 12.234
## V5 8.415
## V8.Not.in.family 7.769
## V7.Adm.clerical 7.586
## V6.Never.married 7.133
## V8.Unmarried 7.068
## V4.HS.grad 6.558
## V9.Black 6.524
## V4.Some.college 6.315
## V6.Divorced 5.049
## V9.White 4.619
## V2.Federal.gov 4.384
## V2.Private 4.010
## V8.Own.child 3.991
## V2.State.gov 3.847
## V6.Married.spouse.absent 3.597
# Predict outcome using Adult_TDA_PC_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 2125
## >50K 2 227
##
## Accuracy : 0.7822
## 95% CI : (0.7739, 0.7904)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 3.909e-08
##
## Kappa : 0.1391
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99973
## Specificity : 0.09651
## Pos Pred Value : 0.77723
## Neg Pred Value : 0.99127
## Prevalence : 0.75921
## Detection Rate : 0.75901
## Detection Prevalence : 0.97656
## Balanced Accuracy : 0.54812
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 2125
## >50K 2 227
##
## Accuracy : 0.7822
## 95% CI : (0.7739, 0.7904)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 3.909e-08
##
## Kappa : 0.1391
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99973
## Specificity : 0.09651
## Pos Pred Value : 0.77723
## Neg Pred Value : 0.99127
## Prevalence : 0.75921
## Detection Rate : 0.75901
## Detection Prevalence : 0.97656
## Balanced Accuracy : 0.54812
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.822482e-01 1.391173e-01 7.739288e-01 7.903989e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.908539e-08 0.000000e+00
ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99973031 0.09651361 0.77723032
## Neg Pred Value Precision Recall
## 0.99126638 0.77723032 0.99973031
## F1 Prevalence Detection Rate
## 0.87455028 0.75921376 0.75900901
## Detection Prevalence Balanced Accuracy
## 0.97655610 0.54812196
ad_tda_pc_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n5_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n5_3_fold
## Accuracy
## 1 -0.1397753
## 2 -0.1383156
## 3 -0.1401002
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n5_3_fold$probRight
bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.9912667
##
## $winRope
## [1] 0.008733333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_3_fold
## $left
## [1] 0.999988
##
## $rope
## [1] 2.995516e-06
##
## $right
## [1] 8.995439e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold)
## t = -254.01, df = 2, p-value = 1.55e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1417583 -0.1370358
## sample estimates:
## mean of x
## -0.139397
### Test set diff
diff_tda_pca_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n5_test
## Accuracy
## 0.07432432
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_test_odds.left<-bst_tda_pca_5.50.5_rf.n5_test$probLeft/bst_tda_pca_5.50.5_rf.n5_test$probRight
bst_tda_pca_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1621
##
## $winRight
## [1] 0.8379
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n5_test))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n5_test)) #bf_tda_pca_5.50.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_KDE_5.50.5_n1_RfFit0
## Random Forest
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8924, 8926, 8924
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8147450 0.3981242
## 55 0.8619565 0.6270583
## 108 0.8606866 0.6263647
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8622003 0.6266351 Fold1
## 2 0.8559265 0.6143710 Fold3
## 3 0.8677427 0.6401687 Fold2
ad_tda_kde_5.50.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n1_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 13387 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 26774 matrix numeric
## oob.times 13387 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 13387 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_KDE_5.50.5_n1_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V5 100.000
## V11 82.593
## V3 81.464
## V6.Married.civ.spouse 81.001
## V1 69.986
## V13 45.394
## V12 19.849
## V8.Husband 16.687
## V7.Exec.managerial 10.834
## V7.Prof.specialty 10.815
## V2.Private 7.020
## V2.Self.emp.not.inc 6.202
## V6.Never.married 5.443
## V7.Craft.repair 5.084
## V7.Sales 5.065
## V2.Self.emp.inc 4.529
## V2.Local.gov 4.141
## V8.Wife 3.963
## V2.Federal.gov 3.505
## V7.Transport.moving 3.438
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7146 580
## >50K 270 1772
##
## Accuracy : 0.913
## 95% CI : (0.9072, 0.9185)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7508
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9636
## Specificity : 0.7534
## Pos Pred Value : 0.9249
## Neg Pred Value : 0.8678
## Prevalence : 0.7592
## Detection Rate : 0.7316
## Detection Prevalence : 0.7910
## Balanced Accuracy : 0.8585
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7146 580
## >50K 270 1772
##
## Accuracy : 0.913
## 95% CI : (0.9072, 0.9185)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7508
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9636
## Specificity : 0.7534
## Pos Pred Value : 0.9249
## Neg Pred Value : 0.8678
## Prevalence : 0.7592
## Detection Rate : 0.7316
## Detection Prevalence : 0.7910
## Balanced Accuracy : 0.8585
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.129812e-01 7.507791e-01 9.072164e-01 9.184982e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 3.024336e-26
ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9635922 0.7534014 0.9249288
## Neg Pred Value Precision Recall
## 0.8677767 0.9249288 0.9635922
## F1 Prevalence Detection Rate
## 0.9438647 0.7592138 0.7315725
## Detection Prevalence Balanced Accuracy
## 0.7909500 0.8584968
ad_tda_kde_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n1_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n1_3_fold
## Accuracy
## 1 -0.004474596
## 2 0.004299869
## 3 -0.009509145
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n1_3_fold$probRight
bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_3_fold
## $left
## [1] 0.1416295
##
## $rope
## [1] 0.8059252
##
## $right
## [1] 0.05244523
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
#bf_tda_kde_5.50.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold)
## t = -0.80004, df = 2, p-value = 0.5076
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02058812 0.01413220
## sample estimates:
## mean of x
## -0.003227958
### Test set diff
diff_tda_kde_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n1_test
## Accuracy
## -0.05640868
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_test_odds.left<-bst_tda_kde_5.50.5_rf.n1_test$probLeft/bst_tda_kde_5.50.5_rf.n1_test$probRight
bst_tda_kde_5.50.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_test
## $winLeft
## [1] 0.8411667
##
## $winRope
## [1] 0.1588333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n1_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n1_test)) #bf_tda_kde_5.50.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n1_test))
##Node2
Adult_TDA_KDE_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n2.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_KDE_5.50.5_n2_RfFit0
## Random Forest
##
## 12638 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8425, 8425, 8426
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7850933 0.3200908
## 55 0.8420635 0.5930775
## 108 0.8396105 0.5886620
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8402563 0.5872528 Fold1
## 2 0.8409307 0.5955220 Fold3
## 3 0.8450036 0.5964577 Fold2
ad_tda_KDE_5.50.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n2_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 12638 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 25276 matrix numeric
## oob.times 12638 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 12638 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_KDE_5.50.5_n2_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V6.Married.civ.spouse 100.000
## V3 94.813
## V1 78.163
## V11 76.451
## V5 54.055
## V13 44.953
## V8.Husband 37.629
## V12 21.969
## V7.Exec.managerial 10.964
## V6.Never.married 8.433
## V8.Wife 8.337
## V2.Private 7.940
## V7.Prof.specialty 7.070
## V2.Self.emp.not.inc 5.941
## V7.Sales 5.904
## V7.Craft.repair 5.350
## V4.HS.grad 4.882
## V2.Local.gov 4.593
## V2.Self.emp.inc 4.587
## V7.Adm.clerical 4.277
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7143 566
## >50K 273 1786
##
## Accuracy : 0.9141
## 95% CI : (0.9084, 0.9196)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7546
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9632
## Specificity : 0.7594
## Pos Pred Value : 0.9266
## Neg Pred Value : 0.8674
## Prevalence : 0.7592
## Detection Rate : 0.7313
## Detection Prevalence : 0.7892
## Balanced Accuracy : 0.8613
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7143 566
## >50K 273 1786
##
## Accuracy : 0.9141
## 95% CI : (0.9084, 0.9196)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7546
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9632
## Specificity : 0.7594
## Pos Pred Value : 0.9266
## Neg Pred Value : 0.8674
## Prevalence : 0.7592
## Detection Rate : 0.7313
## Detection Prevalence : 0.7892
## Balanced Accuracy : 0.8613
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.141073e-01 7.546383e-01 9.083751e-01 9.195912e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 6.706794e-24
ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9631877 0.7593537 0.9265793
## Neg Pred Value Precision Recall
## 0.8674114 0.9265793 0.9631877
## F1 Prevalence Detection Rate
## 0.9445289 0.7592138 0.7312654
## Detection Prevalence Balanced Accuracy
## 0.7892097 0.8612707
ad_tda_kde_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.50.5_n2_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n2_3_fold
## Accuracy
## 1 0.01746937
## 2 0.01929570
## 3 0.01322995
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n2_3_fold$probRight
bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1643667
##
## $winRight
## [1] 0.8356333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_3_fold
## $left
## [1] 0.002999288
##
## $rope
## [1] 0.03937538
##
## $right
## [1] 0.9576253
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
#bf_tda_kde_5.50.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold)
## t = 9.2757, df = 2, p-value = 0.01142
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.008934758 0.024395256
## sample estimates:
## mean of x
## 0.01666501
### Test set diff
diff_tda_kde_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n2_test
## Accuracy
## -0.05753481
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_test_odds.left<-bst_tda_kde_5.50.5_rf.n2_test$probLeft/bst_tda_kde_5.50.5_rf.n2_test$probRight
bst_tda_kde_5.50.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_test
## $winLeft
## [1] 0.8412333
##
## $winRope
## [1] 0.1587667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n2_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n2_test)) #bf_tda_kde_5.50.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_KDE_5.50.5_n3_RfFit0
## Random Forest
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7756, 7756, 7756
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7686092 0.2314905
## 55 0.8347946 0.5710963
## 108 0.8309266 0.5625681
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8300670 0.5574324 Fold1
## 2 0.8321300 0.5667173 Fold3
## 3 0.8421867 0.5891393 Fold2
ad_tda_kde_5.50.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n3_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 11634 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 23268 matrix numeric
## oob.times 11634 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 11634 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_KDE_5.50.5_n3_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V6.Married.civ.spouse 97.792
## V1 73.869
## V11 63.571
## V13 43.473
## V5 35.861
## V8.Husband 35.502
## V12 21.177
## V6.Never.married 10.870
## V7.Exec.managerial 9.354
## V8.Wife 7.976
## V2.Private 7.408
## V2.Self.emp.not.inc 6.442
## V7.Sales 6.073
## V7.Prof.specialty 6.039
## V7.Craft.repair 5.629
## V4.Bachelors 4.988
## V4.HS.grad 4.750
## V9.White 4.219
## V7.Adm.clerical 4.153
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7110 551
## >50K 306 1801
##
## Accuracy : 0.9123
## 95% CI : (0.9065, 0.9178)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7512
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9587
## Specificity : 0.7657
## Pos Pred Value : 0.9281
## Neg Pred Value : 0.8548
## Prevalence : 0.7592
## Detection Rate : 0.7279
## Detection Prevalence : 0.7843
## Balanced Accuracy : 0.8622
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7110 551
## >50K 306 1801
##
## Accuracy : 0.9123
## 95% CI : (0.9065, 0.9178)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7512
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9587
## Specificity : 0.7657
## Pos Pred Value : 0.9281
## Neg Pred Value : 0.8548
## Prevalence : 0.7592
## Detection Rate : 0.7279
## Detection Prevalence : 0.7843
## Balanced Accuracy : 0.8622
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.122645e-01 7.511850e-01 9.064793e-01 9.178026e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 7.757603e-17
ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9587379 0.7657313 0.9280773
## Neg Pred Value Precision Recall
## 0.8547698 0.9280773 0.9587379
## F1 Prevalence Detection Rate
## 0.9431585 0.7592138 0.7278870
## Detection Prevalence Balanced Accuracy
## 0.7842957 0.8622346
ad_tda_kde_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n3_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n3_3_fold
## Accuracy
## 1 0.02765867
## 2 0.02809641
## 3 0.01604682
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n3_3_fold$probRight
bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.03763333
##
## $winRight
## [1] 0.9623667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_3_fold
## $left
## [1] 0.008776313
##
## $rope
## [1] 0.03739373
##
## $right
## [1] 0.95383
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
#bf_tda_kde_5.50.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold)
## t = 6.066, df = 2, p-value = 0.02612
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.006957428 0.040910507
## sample estimates:
## mean of x
## 0.02393397
### Test set diff
diff_tda_kde_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n3_test
## Accuracy
## -0.05569206
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_test_odds.left<-bst_tda_kde_5.50.5_rf.n3_test$probLeft/bst_tda_kde_5.50.5_rf.n3_test$probRight
bst_tda_kde_5.50.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_test
## $winLeft
## [1] 0.8413667
##
## $winRope
## [1] 0.1586333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n3_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n3_test)) #bf_tda_kde_5.50.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_KDE_5.50.5_n4_RfFit0
## Random Forest
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6692, 6692, 6692
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7965730 0.04215066
## 55 0.8508667 0.51856797
## 108 0.8488743 0.51302439
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8493724 0.5058215 Fold1
## 2 0.8478781 0.5128345 Fold3
## 3 0.8553497 0.5370479 Fold2
ad_tda_kde_5.50.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n4_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 10038 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 20076 matrix numeric
## oob.times 10038 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 10038 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_KDE_5.50.5_n4_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V6.Married.civ.spouse 86.929
## V1 68.178
## V11 54.468
## V13 39.826
## V8.Husband 38.618
## V5 20.141
## V12 19.266
## V4.Bachelors 10.960
## V6.Never.married 9.170
## V7.Exec.managerial 7.847
## V2.Private 6.771
## V7.Craft.repair 6.460
## V7.Sales 6.382
## V2.Self.emp.not.inc 6.056
## V8.Wife 5.972
## V7.Prof.specialty 5.815
## V7.Adm.clerical 4.797
## V7.Transport.moving 4.472
## V7.Tech.support 4.455
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6992 686
## >50K 424 1666
##
## Accuracy : 0.8864
## 95% CI : (0.8799, 0.8926)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6769
##
## Mcnemar's Test P-Value : 4.729e-15
##
## Sensitivity : 0.9428
## Specificity : 0.7083
## Pos Pred Value : 0.9107
## Neg Pred Value : 0.7971
## Prevalence : 0.7592
## Detection Rate : 0.7158
## Detection Prevalence : 0.7860
## Balanced Accuracy : 0.8256
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6992 686
## >50K 424 1666
##
## Accuracy : 0.8864
## 95% CI : (0.8799, 0.8926)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6769
##
## Mcnemar's Test P-Value : 4.729e-15
##
## Sensitivity : 0.9428
## Specificity : 0.7083
## Pos Pred Value : 0.9107
## Neg Pred Value : 0.7971
## Prevalence : 0.7592
## Detection Rate : 0.7158
## Detection Prevalence : 0.7860
## Balanced Accuracy : 0.8256
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.863636e-01 6.769042e-01 8.799032e-01 8.925927e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.213505e-222 4.728975e-15
ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9428263 0.7083333 0.9106538
## Neg Pred Value Precision Recall
## 0.7971292 0.9106538 0.9428263
## F1 Prevalence Detection Rate
## 0.9264608 0.7592138 0.7158067
## Detection Prevalence Balanced Accuracy
## 0.7860360 0.8255798
ad_tda_kde_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n4_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n4_3_fold
## Accuracy
## 1 0.008353332
## 2 0.012348312
## 3 0.002883842
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n4_3_fold$probRight
bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.8423
##
## $winRight
## [1] 0.1577
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_3_fold
## $left
## [1] 0.01501928
##
## $rope
## [1] 0.7003601
##
## $right
## [1] 0.2846207
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n4_test
## Accuracy
## -0.02979115
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
#bf_tda_kde_5.50.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold)
## t = 2.8659, df = 2, p-value = 0.1032
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003941154 0.019664811
## sample estimates:
## mean of x
## 0.007861829
bst_tda_kde_5.50.5_rf.n4_test_odds.left<-bst_tda_kde_5.50.5_rf.n4_test$probLeft/bst_tda_kde_5.50.5_rf.n4_test$probRight
bst_tda_kde_5.50.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_test
## $winLeft
## [1] 0.841
##
## $winRope
## [1] 0.159
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n4_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n4_test)) #bf_tda_kde_5.50.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec,
Importance=T, n.tree=100,
method = 'rf',
trControl = fitControl,
metric='Accuracy')
Adult_TDA_KDE_5.50.5_n5_RfFit0
## Random Forest
##
## 7540 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5028, 5026, 5026
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8461539 0.0000000
## 55 0.8704258 0.4170561
## 108 0.8665802 0.4107397
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8757962 0.4321815 Fold1
## 2 0.8687351 0.4186501 Fold3
## 3 0.8667462 0.4003366 Fold2
ad_tda_kde_5.50.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n5_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_RfFit0)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 7540 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 15080 matrix numeric
## oob.times 7540 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 7540 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 2 -none- list
varImp (Adult_TDA_KDE_5.50.5_n5_RfFit0)
## rf variable importance
##
## only 20 most important variables shown (out of 108)
##
## Overall
## V3 100.000
## V1 62.634
## V6.Married.civ.spouse 54.512
## V11 53.340
## V13 37.313
## V8.Husband 22.589
## V12 18.432
## V7.Exec.managerial 7.125
## V7.Craft.repair 6.959
## V2.Private 6.085
## V5 5.980
## V7.Sales 5.951
## V7.Prof.specialty 5.280
## V7.Adm.clerical 5.005
## V7.Machine.op.inspct 4.864
## V6.Never.married 4.688
## V7.Transport.moving 4.612
## V2.Self.emp.not.inc 4.596
## V7.Tech.support 4.516
## V4.HS.grad 4.323
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6679 772
## >50K 737 1580
##
## Accuracy : 0.8455
## 95% CI : (0.8382, 0.8526)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5753
##
## Mcnemar's Test P-Value : 0.3814
##
## Sensitivity : 0.9006
## Specificity : 0.6718
## Pos Pred Value : 0.8964
## Neg Pred Value : 0.6819
## Prevalence : 0.7592
## Detection Rate : 0.6838
## Detection Prevalence : 0.7628
## Balanced Accuracy : 0.7862
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6679 772
## >50K 737 1580
##
## Accuracy : 0.8455
## 95% CI : (0.8382, 0.8526)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5753
##
## Mcnemar's Test P-Value : 0.3814
##
## Sensitivity : 0.9006
## Specificity : 0.6718
## Pos Pred Value : 0.8964
## Neg Pred Value : 0.6819
## Prevalence : 0.7592
## Detection Rate : 0.6838
## Detection Prevalence : 0.7628
## Balanced Accuracy : 0.7862
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.455160e-01 5.753120e-01 8.381947e-01 8.526305e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.272474e-97 3.814355e-01
ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9006203 0.6717687 0.8963897
## Neg Pred Value Precision Recall
## 0.6819163 0.8963897 0.9006203
## F1 Prevalence Detection Rate
## 0.8985000 0.7592138 0.6837633
## Detection Prevalence Balanced Accuracy
## 0.7627969 0.7861945
ad_tda_kde_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n5_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n5_3_fold
## Accuracy
## 1 -0.018070461
## 2 -0.008508708
## 3 -0.008512708
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n5_3_fold$probRight
bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.3229333
##
## $winRope
## [1] 0.6770667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_3_fold
## $left
## [1] 0.6550469
##
## $rope
## [1] 0.3311655
##
## $right
## [1] 0.01378764
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
#bf_tda_kde_5.50.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold)
## t = -3.6708, df = 2, p-value = 0.06686
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.025408059 0.002013474
## sample estimates:
## mean of x
## -0.01169729
### Test set diff
diff_tda_kde_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n5_test
## Accuracy
## 0.01105651
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_test_odds.left<-bst_tda_kde_5.50.5_rf.n5_test$probLeft/bst_tda_kde_5.50.5_rf.n5_test$probRight
bst_tda_kde_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4613333
##
## $winRight
## [1] 0.5386667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n5_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n5_test)) #bf_tda_kde_5.50.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n5_test))
##Non-TDA-Assisted
#Support Vector Machine-Radial Basis
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.8125752 0.3347893
## 0.50 0.8143740 0.3452636
## 1.00 0.8151200 0.3663336
##
## Tuning parameter 'sigma' was held constant at a value of 0.009491164
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.009491164 and C = 1.
adultSvmFit$resample
## Accuracy Kappa Resample
## 1 0.8442806 0.53298607 Fold2
## 2 0.7540142 0.02953679 Fold1
## 3 0.8470650 0.53647790 Fold3
ad_svm_fit_re<-adultSvmFit$resample[1]
summary(adultSvmFit)
## Length Class Mode
## 1 ksvm S4
#varImp (adultSvmFit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6997 1036
## >50K 419 1316
##
## Accuracy : 0.851
## 95% CI : (0.8438, 0.8581)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5525
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9435
## Specificity : 0.5595
## Pos Pred Value : 0.8710
## Neg Pred Value : 0.7585
## Prevalence : 0.7592
## Detection Rate : 0.7163
## Detection Prevalence : 0.8224
## Balanced Accuracy : 0.7515
##
## 'Positive' Class : <=50K
##
svm_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.510442e-01 5.525103e-01 8.438277e-01 8.580507e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.493502e-111 1.151688e-58
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9435005 0.5595238 0.8710320
## Neg Pred Value Precision Recall
## 0.7585014 0.8710320 0.9435005
## F1 Prevalence Detection Rate
## 0.9058191 0.7592138 0.7163186
## Detection Prevalence Balanced Accuracy
## 0.8223792 0.7515122
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8138, 8137, 8137
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.5604626 0.07396875
## 0.50 0.5589061 0.08534571
## 1.00 0.5590698 0.08738347
##
## Tuning parameter 'sigma' was held constant at a value of 1.958979e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.958979e-09 and C = 0.25.
Adult_TDA_PC_5.50.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.5583681 0.07058218 Fold3
## 2 0.5561563 0.06156455 Fold2
## 3 0.5668633 0.08975953 Fold1
ad_tda_pc_5.50.5_n1_svm_fit_re<-Adult_TDA_PC_5.50.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_PC_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1158 283
## >50K 6258 2069
##
## Accuracy : 0.3304
## 95% CI : (0.321, 0.3398)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0192
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1561
## Specificity : 0.8797
## Pos Pred Value : 0.8036
## Neg Pred Value : 0.2485
## Prevalence : 0.7592
## Detection Rate : 0.1186
## Detection Prevalence : 0.1475
## Balanced Accuracy : 0.5179
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1158 283
## >50K 6258 2069
##
## Accuracy : 0.3304
## 95% CI : (0.321, 0.3398)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0192
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1561
## Specificity : 0.8797
## Pos Pred Value : 0.8036
## Neg Pred Value : 0.2485
## Prevalence : 0.7592
## Detection Rate : 0.1186
## Detection Prevalence : 0.1475
## Balanced Accuracy : 0.5179
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.33036446 0.01918533 0.32103757 0.33979259 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.1561489 0.8796769 0.8036086
## Neg Pred Value Precision Recall
## 0.2484688 0.8036086 0.1561489
## F1 Prevalence Detection Rate
## 0.2614881 0.7592138 0.1185504
## Detection Prevalence Balanced Accuracy
## 0.1475225 0.5179129
ad_tda_pc_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n1_svm_fit_re)
diff_tda_pca_5.50.5_svm_n1_3_fold
## Accuracy
## 1 0.2859125
## 2 0.1978579
## 3 0.2802017
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n1_3_fold$probRight
bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0089
##
## $winRight
## [1] 0.9911
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_3_fold
## $left
## [1] 0.007528936
##
## $rope
## [1] 0.001247696
##
## $right
## [1] 0.9912234
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold)
## t = 8.9518, df = 2, p-value = 0.01225
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1322575 0.3770572
## sample estimates:
## mean of x
## 0.2546574
### Test set diff
diff_tda_pca_5.50.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n1_test
## Accuracy
## 0.5206798
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_test_odds.left<-bst_tda_pca_5.50.5_svm.n1_test$probLeft/bst_tda_pca_5.50.5_svm.n1_test$probRight
bst_tda_pca_5.50.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1577333
##
## $winRight
## [1] 0.8422667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n1_test)) #bf_tda_pca_5.50.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8136, 8138, 8138
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.5641481 0.07100743
## 0.50 0.5642307 0.08915954
## 1.00 0.5620176 0.10430376
##
## Tuning parameter 'sigma' was held constant at a value of 1.725656e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.725656e-09 and C = 0.5.
Adult_TDA_PC_5.50.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.5643735 0.07702904 Fold1
## 2 0.5629302 0.08647680 Fold3
## 3 0.5653884 0.10397280 Fold2
ad_tda_pc_5.50.5_n2_svm_fit_re<-Adult_TDA_PC_5.50.5_n2_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_PC_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2074 516
## >50K 5342 1836
##
## Accuracy : 0.4003
## 95% CI : (0.3906, 0.4101)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0354
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2797
## Specificity : 0.7806
## Pos Pred Value : 0.8008
## Neg Pred Value : 0.2558
## Prevalence : 0.7592
## Detection Rate : 0.2123
## Detection Prevalence : 0.2652
## Balanced Accuracy : 0.5301
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2074 516
## >50K 5342 1836
##
## Accuracy : 0.4003
## 95% CI : (0.3906, 0.4101)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0354
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2797
## Specificity : 0.7806
## Pos Pred Value : 0.8008
## Neg Pred Value : 0.2558
## Prevalence : 0.7592
## Detection Rate : 0.2123
## Detection Prevalence : 0.2652
## Balanced Accuracy : 0.5301
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.40028665 0.03544592 0.39055006 0.41008273 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2796656 0.7806122 0.8007722
## Neg Pred Value Precision Recall
## 0.2557816 0.8007722 0.2796656
## F1 Prevalence Detection Rate
## 0.4145513 0.7592138 0.2123260
## Detection Prevalence Balanced Accuracy
## 0.2651515 0.5301389
ad_tda_pc_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n2_svm_fit_re)
diff_tda_pca_5.50.5_svm_n2_3_fold
## Accuracy
## 1 0.2799072
## 2 0.1910840
## 3 0.2816766
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n2_3_fold$probRight
bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.01056667
##
## $winRight
## [1] 0.9894333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_3_fold
## $left
## [1] 0.008537027
##
## $rope
## [1] 0.001432547
##
## $right
## [1] 0.9900304
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold)
## t = 8.389, df = 2, p-value = 0.01391
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1222099 0.3795686
## sample estimates:
## mean of x
## 0.2508893
### Test set diff
diff_tda_pca_5.50.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n2_test
## Accuracy
## 0.4507576
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_test_odds.left<-bst_tda_pca_5.50.5_svm.n2_test$probLeft/bst_tda_pca_5.50.5_svm.n2_test$probRight
bst_tda_pca_5.50.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1614667
##
## $winRight
## [1] 0.8385333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n2_test)) #bf_tda_pca_5.50.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.7980364 0.1721521
## 0.50 0.8018128 0.1970827
## 1.00 0.8021150 0.2059550
##
## Tuning parameter 'sigma' was held constant at a value of 1.794281e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.794281e-09 and C = 1.
Adult_TDA_PC_5.50.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8012690 0.2004885 Fold2
## 2 0.8051212 0.2199626 Fold1
## 3 0.7999547 0.1974139 Fold3
ad_tda_pc_5.50.5_n3_svm_fit_re<-Adult_TDA_PC_5.50.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_PC_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7389 1952
## >50K 27 400
##
## Accuracy : 0.7974
## 95% CI : (0.7893, 0.8053)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.231
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9964
## Specificity : 0.1701
## Pos Pred Value : 0.7910
## Neg Pred Value : 0.9368
## Prevalence : 0.7592
## Detection Rate : 0.7564
## Detection Prevalence : 0.9563
## Balanced Accuracy : 0.5832
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7389 1952
## >50K 27 400
##
## Accuracy : 0.7974
## 95% CI : (0.7893, 0.8053)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.231
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9964
## Specificity : 0.1701
## Pos Pred Value : 0.7910
## Neg Pred Value : 0.9368
## Prevalence : 0.7592
## Detection Rate : 0.7564
## Detection Prevalence : 0.9563
## Balanced Accuracy : 0.5832
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.973997e-01 2.309691e-01 7.892895e-01 8.053321e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.281964e-19 0.000000e+00
ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9963592 0.1700680 0.7910288
## Neg Pred Value Precision Recall
## 0.9367681 0.7910288 0.9963592
## F1 Prevalence Detection Rate
## 0.8819001 0.7592138 0.7564496
## Detection Prevalence Balanced Accuracy
## 0.9562858 0.5832136
ad_tda_pc_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n3_svm_fit_re)
diff_tda_pca_5.50.5_svm_n3_3_fold
## Accuracy
## 1 0.04301166
## 2 -0.05110702
## 3 0.04711033
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n3_3_fold$probRight
bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left
## [1] 0.5
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0.1573667
##
## $winRope
## [1] 0.2193667
##
## $winRight
## [1] 0.6232667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_3_fold
## $left
## [1] 0.2989491
##
## $rope
## [1] 0.1724151
##
## $right
## [1] 0.5286359
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold)
## t = 0.40542, df = 2, p-value = 0.7244
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1250148 0.1510248
## sample estimates:
## mean of x
## 0.01300499
### Test set diff
diff_tda_pca_5.50.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n3_test
## Accuracy
## 0.05364455
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_test_odds.left<-bst_tda_pca_5.50.5_svm.n3_test$probLeft/bst_tda_pca_5.50.5_svm.n3_test$probRight
bst_tda_pca_5.50.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1564333
##
## $winRight
## [1] 0.8435667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n3_test)) #bf_tda_pca_5.50.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n3_test))
##Node4
Adult_TDA_PC_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11134, 11133
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9459281 0.03567091
## 0.50 0.9475448 0.09220474
## 1.00 0.9486824 0.12855266
##
## Tuning parameter 'sigma' was held constant at a value of 0.00954656
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.00954656 and C = 1.
Adult_TDA_PC_5.50.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9448437 -0.0003582824 Fold2
## 2 0.9506018 0.1845812798 Fold1
## 3 0.9506018 0.2014349837 Fold3
ad_tda_pc_5.50.5_n4_svm_fit_re<-Adult_TDA_PC_5.50.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_PC_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 2186
## >50K 3 166
##
## Accuracy : 0.7759
## 95% CI : (0.7675, 0.7841)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 5.336e-05
##
## Kappa : 0.1027
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99960
## Specificity : 0.07058
## Pos Pred Value : 0.77227
## Neg Pred Value : 0.98225
## Prevalence : 0.75921
## Detection Rate : 0.75891
## Detection Prevalence : 0.98270
## Balanced Accuracy : 0.53509
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 2186
## >50K 3 166
##
## Accuracy : 0.7759
## 95% CI : (0.7675, 0.7841)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 5.336e-05
##
## Kappa : 0.1027
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99960
## Specificity : 0.07058
## Pos Pred Value : 0.77227
## Neg Pred Value : 0.98225
## Prevalence : 0.75921
## Detection Rate : 0.75891
## Detection Prevalence : 0.98270
## Balanced Accuracy : 0.53509
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.759009e-01 1.027270e-01 7.674988e-01 7.841382e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.336459e-05 0.000000e+00
ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99959547 0.07057823 0.77226794
## Neg Pred Value Precision Recall
## 0.98224852 0.77226794 0.99959547
## F1 Prevalence Detection Rate
## 0.87134881 0.75921376 0.75890663
## Detection Prevalence Balanced Accuracy
## 0.98269861 0.53508685
ad_tda_pc_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n4_svm_fit_re)
diff_tda_pca_5.50.5_svm_n4_3_fold
## Accuracy
## 1 -0.1005631
## 2 -0.1965875
## 3 -0.1035367
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n4_3_fold$probRight
bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0.9919
##
## $winRope
## [1] 0.0081
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_3_fold
## $left
## [1] 0.9615457
##
## $rope
## [1] 0.009110337
##
## $right
## [1] 0.02934392
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold)
## t = -4.2368, df = 2, p-value = 0.05145
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.269200300 0.002075403
## sample estimates:
## mean of x
## -0.1335624
### Test set diff
diff_tda_pca_5.50.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n4_test
## Accuracy
## 0.07514333
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_test_odds.left<-bst_tda_pca_5.50.5_svm.n4_test$probLeft/bst_tda_pca_5.50.5_svm.n4_test$probRight
bst_tda_pca_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1563333
##
## $winRight
## [1] 0.8436667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n4_test)) #bf_tda_pca_5.50.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9603, 9603, 9602
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9979867 0
## 0.50 0.9979867 0
## 1.00 0.9979867 0
##
## Tuning parameter 'sigma' was held constant at a value of 1.772804e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.772804e-09 and C = 0.25.
Adult_TDA_PC_5.50.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9979175 0 Fold3
## 2 0.9981254 0 Fold2
## 3 0.9979171 0 Fold1
ad_tda_pc_5.50.5_n5_svm_fit_re<-Adult_TDA_PC_5.50.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_PC_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n5_svm_fit_re)
diff_tda_pca_5.50.5_svm_n5_3_fold
## Accuracy
## 1 -0.1536369
## 2 -0.2441112
## 3 -0.1508521
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n5_3_fold$probRight
bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0.9903
##
## $winRope
## [1] 0.0097
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_3_fold
## $left
## [1] 0.9802949
##
## $rope
## [1] 0.003690937
##
## $right
## [1] 0.01601413
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold)
## t = -5.9696, df = 2, p-value = 0.02693
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.31466893 -0.05106451
## sample estimates:
## mean of x
## -0.1828667
### Test set diff
diff_tda_pca_5.50.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n5_test
## Accuracy
## 0.09183047
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_test_odds.left<-bst_tda_pca_5.50.5_svm.n5_test$probLeft/bst_tda_pca_5.50.5_svm.n5_test$probRight
bst_tda_pca_5.50.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1624667
##
## $winRight
## [1] 0.8375333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n5_test)) #bf_tda_pca_5.50.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8925, 8925, 8924
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.8061601 0.3585370
## 0.50 0.8086254 0.3697234
## 1.00 0.8111654 0.3890763
##
## Tuning parameter 'sigma' was held constant at a value of 0.008508323
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.008508323 and C = 1.
Adult_TDA_KDE_5.50.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8518602 0.5853196 Fold2
## 2 0.8415509 0.5604742 Fold1
## 3 0.7400851 0.0214351 Fold3
ad_tda_kde_5.50.5_n1_svm_fit_re<-Adult_TDA_KDE_5.50.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7008 1013
## >50K 408 1339
##
## Accuracy : 0.8545
## 95% CI : (0.8474, 0.8615)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5638
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9450
## Specificity : 0.5693
## Pos Pred Value : 0.8737
## Neg Pred Value : 0.7665
## Prevalence : 0.7592
## Detection Rate : 0.7174
## Detection Prevalence : 0.8212
## Balanced Accuracy : 0.7571
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7008 1013
## >50K 408 1339
##
## Accuracy : 0.8545
## 95% CI : (0.8474, 0.8615)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5638
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9450
## Specificity : 0.5693
## Pos Pred Value : 0.8737
## Neg Pred Value : 0.7665
## Prevalence : 0.7592
## Detection Rate : 0.7174
## Detection Prevalence : 0.8212
## Balanced Accuracy : 0.7571
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.545250e-01 5.638017e-01 8.473761e-01 8.614617e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.580820e-120 8.849231e-58
ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9449838 0.5693027 0.8737065
## Neg Pred Value Precision Recall
## 0.7664568 0.8737065 0.9449838
## F1 Prevalence Detection Rate
## 0.9079484 0.7592138 0.7174447
## Detection Prevalence Balanced Accuracy
## 0.8211507 0.7571433
ad_tda_kde_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n1_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n1_svm_fit_re)
diff_tda_kde_5.50.5_svm_n1_3_fold
## Accuracy
## 1 0.005865565
## 2 0.018675501
## 3 0.118148369
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n1_3_fold$probRight
bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.2156
##
## $winRight
## [1] 0.7844
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_3_fold
## $left
## [1] 0.1476268
##
## $rope
## [1] 0.08040408
##
## $right
## [1] 0.7719691
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
#bf_tda_kde_5.50.5_svm.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold)
## t = 1.3403, df = 2, p-value = 0.3121
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1051200 0.2002463
## sample estimates:
## mean of x
## 0.04756314
### Test set diff
diff_tda_kde_5.50.5_svm.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n1_test
## Accuracy
## 0.002047502
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_test_odds.left<-bst_tda_kde_5.50.5_svm.n1_test$probLeft/bst_tda_kde_5.50.5_svm.n1_test$probRight
bst_tda_kde_5.50.5_svm.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n1_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n1_test)) #bf_tda_kde_5.50.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n2.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12638 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8425, 8425, 8426
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.7672885 0.2266295
## 0.50 0.7695040 0.2462460
## 1.00 0.7684756 0.2480235
##
## Tuning parameter 'sigma' was held constant at a value of 1.621074e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.621074e-09 and C = 0.5.
Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7728460 0.2619111 Fold1
## 2 0.7616334 0.2103098 Fold3
## 3 0.7740328 0.2665170 Fold2
ad_tda_kde_5.50.5_n2_svm_fit_re<-Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7186 1875
## >50K 230 477
##
## Accuracy : 0.7845
## 95% CI : (0.7762, 0.7926)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.766e-09
##
## Kappa : 0.2257
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9690
## Specificity : 0.2028
## Pos Pred Value : 0.7931
## Neg Pred Value : 0.6747
## Prevalence : 0.7592
## Detection Rate : 0.7357
## Detection Prevalence : 0.9276
## Balanced Accuracy : 0.5859
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7186 1875
## >50K 230 477
##
## Accuracy : 0.7845
## 95% CI : (0.7762, 0.7926)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.766e-09
##
## Kappa : 0.2257
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9690
## Specificity : 0.2028
## Pos Pred Value : 0.7931
## Neg Pred Value : 0.6747
## Prevalence : 0.7592
## Detection Rate : 0.7357
## Detection Prevalence : 0.9276
## Balanced Accuracy : 0.5859
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.845004e-01 2.256840e-01 7.762111e-01 7.926197e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.765672e-09 3.458873e-281
ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9689860 0.2028061 0.7930692
## Neg Pred Value Precision Recall
## 0.6746818 0.7930692 0.9689860
## F1 Prevalence Detection Rate
## 0.8722462 0.7592138 0.7356675
## Detection Prevalence Balanced Accuracy
## 0.9276208 0.5858960
ad_tda_kde_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n2_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n2_svm_fit_re)
diff_tda_kde_5.50.5_svm_n2_3_fold
## Accuracy
## 1 0.08487976
## 2 0.09859295
## 3 0.08420076
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n2_3_fold$probRight
bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.007866667
##
## $winRight
## [1] 0.9921333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_3_fold
## $left
## [1] 0.001481744
##
## $rope
## [1] 0.0008367022
##
## $right
## [1] 0.9976816
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
#bf_tda_kde_5.50.5_svm.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold)
## t = 19.031, df = 2, p-value = 0.00275
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.06905224 0.10939674
## sample estimates:
## mean of x
## 0.08922449
### Test set diff
diff_tda_kde_5.50.5_svm.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n2_test
## Accuracy
## 0.07207207
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_test_odds.left<-bst_tda_kde_5.50.5_svm.n2_test$probLeft/bst_tda_kde_5.50.5_svm.n2_test$probRight
bst_tda_kde_5.50.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1592
##
## $winRight
## [1] 0.8408
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n2_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n2_test)) #bf_tda_kde_5.50.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7757, 7756, 7755
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.7616464 0.1975413
## 0.50 0.7636233 0.2132360
## 1.00 0.7640530 0.2171511
##
## Tuning parameter 'sigma' was held constant at a value of 2.398218e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 2.398218e-09 and C = 1.
Adult_TDA_KDE_5.50.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7596699 0.2012373 Fold2
## 2 0.7627031 0.2102880 Fold1
## 3 0.7697860 0.2399280 Fold3
ad_tda_kde_5.50.5_n3_svm_fit_re<-Adult_TDA_KDE_5.50.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6906 1842
## >50K 510 510
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0.1836
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9312
## Specificity : 0.2168
## Pos Pred Value : 0.7894
## Neg Pred Value : 0.5000
## Prevalence : 0.7592
## Detection Rate : 0.7070
## Detection Prevalence : 0.8956
## Balanced Accuracy : 0.5740
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6906 1842
## >50K 510 510
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0.1836
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9312
## Specificity : 0.2168
## Pos Pred Value : 0.7894
## Neg Pred Value : 0.5000
## Prevalence : 0.7592
## Detection Rate : 0.7070
## Detection Prevalence : 0.8956
## Balanced Accuracy : 0.5740
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.592138e-01 1.835590e-01 7.506071e-01 7.676657e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.055358e-01 8.024346e-166
ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9312298 0.2168367 0.7894376
## Neg Pred Value Precision Recall
## 0.5000000 0.7894376 0.9312298
## F1 Prevalence Detection Rate
## 0.8544915 0.7592138 0.7070025
## Detection Prevalence Balanced Accuracy
## 0.8955774 0.5740333
ad_tda_kde_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n3_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n3_svm_fit_re)
diff_tda_kde_5.50.5_svm_n3_3_fold
## Accuracy
## 1 0.09805578
## 2 0.09752325
## 3 0.08844749
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n3_3_fold$probRight
bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009266667
##
## $winRight
## [1] 0.9907333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_3_fold
## $left
## [1] 0.0005903999
##
## $rope
## [1] 0.0003109946
##
## $right
## [1] 0.9990986
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
#bf_tda_kde_5.50.5_svm.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold)
## t = 30.366, df = 2, p-value = 0.001083
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.08126068 0.10809033
## sample estimates:
## mean of x
## 0.09467551
### Test set diff
diff_tda_kde_5.50.5_svm.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n3_test
## Accuracy
## 0.09735872
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_test_odds.left<-bst_tda_kde_5.50.5_svm.n3_test$probLeft/bst_tda_kde_5.50.5_svm.n3_test$probRight
bst_tda_kde_5.50.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1556333
##
## $winRight
## [1] 0.8443667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n3_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n3_test)) #bf_tda_kde_5.50.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6692, 6692, 6692
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.8144053 0.1830632
## 0.50 0.8140068 0.1875297
## 1.00 0.8154015 0.1964493
##
## Tuning parameter 'sigma' was held constant at a value of 5.623657e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 5.623657e-09 and C = 1.
Adult_TDA_KDE_5.50.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8147041 0.1872175 Fold2
## 2 0.8161984 0.2037347 Fold1
## 3 0.8153019 0.1983957 Fold3
ad_tda_kde_5.50.5_n4_svm_fit_re<-Adult_TDA_KDE_5.50.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6391 1667
## >50K 1025 685
##
## Accuracy : 0.7244
## 95% CI : (0.7154, 0.7333)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1688
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8618
## Specificity : 0.2912
## Pos Pred Value : 0.7931
## Neg Pred Value : 0.4006
## Prevalence : 0.7592
## Detection Rate : 0.6543
## Detection Prevalence : 0.8249
## Balanced Accuracy : 0.5765
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6391 1667
## >50K 1025 685
##
## Accuracy : 0.7244
## 95% CI : (0.7154, 0.7333)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1688
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8618
## Specificity : 0.2912
## Pos Pred Value : 0.7931
## Neg Pred Value : 0.4006
## Prevalence : 0.7592
## Detection Rate : 0.6543
## Detection Prevalence : 0.8249
## Balanced Accuracy : 0.5765
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.244062e-01 1.687538e-01 7.154282e-01 7.332502e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 4.613729e-35
ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8617853 0.2912415 0.7931248
## Neg Pred Value Precision Recall
## 0.4005848 0.7931248 0.8617853
## F1 Prevalence Detection Rate
## 0.8260308 0.7592138 0.6542793
## Detection Prevalence Balanced Accuracy
## 0.8249386 0.5765134
ad_tda_kde_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n4_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n4_svm_fit_re)
diff_tda_kde_5.50.5_svm_n4_3_fold
## Accuracy
## 1 0.04302159
## 2 0.04402793
## 3 0.04293166
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n4_3_fold$probRight
bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008933333
##
## $winRight
## [1] 0.9910667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_3_fold
## $left
## [1] 2.894449e-05
##
## $rope
## [1] 4.515383e-05
##
## $right
## [1] 0.9999259
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
#bf_tda_kde_5.50.5_svm.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold)
## t = 123.3, df = 2, p-value = 6.577e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.04181513 0.04483899
## sample estimates:
## mean of x
## 0.04332706
### Test set diff
diff_tda_kde_5.50.5_svm.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n4_test
## Accuracy
## 0.1321663
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_test_odds.left<-bst_tda_kde_5.50.5_svm.n4_test$probLeft/bst_tda_kde_5.50.5_svm.n4_test$probRight
bst_tda_kde_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1582667
##
## $winRight
## [1] 0.8417333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n4_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n4_test)) #bf_tda_kde_5.50.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
method = 'svmRadial',
trControl = fitControl,
metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7757, 7755, 7756
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.7616475 0.1966785
## 0.50 0.7635383 0.2124213
## 1.00 0.7641398 0.2169347
##
## Tuning parameter 'sigma' was held constant at a value of 2.217989e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 2.217989e-09 and C = 1.
Adult_TDA_KDE_5.50.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7656613 0.2203667 Fold2
## 2 0.7686355 0.2361994 Fold1
## 3 0.7581227 0.1942380 Fold3
ad_tda_kde_5.50.5_n5_svm_fit_re<-Adult_TDA_KDE_5.50.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6913 1845
## >50K 503 507
##
## Accuracy : 0.7596
## 95% CI : (0.751, 0.7681)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.4678
##
## Kappa : 0.1835
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9322
## Specificity : 0.2156
## Pos Pred Value : 0.7893
## Neg Pred Value : 0.5020
## Prevalence : 0.7592
## Detection Rate : 0.7077
## Detection Prevalence : 0.8966
## Balanced Accuracy : 0.5739
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6913 1845
## >50K 503 507
##
## Accuracy : 0.7596
## 95% CI : (0.751, 0.7681)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.4678
##
## Kappa : 0.1835
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9322
## Specificity : 0.2156
## Pos Pred Value : 0.7893
## Neg Pred Value : 0.5020
## Prevalence : 0.7592
## Detection Rate : 0.7077
## Detection Prevalence : 0.8966
## Balanced Accuracy : 0.5739
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.596233e-01 1.834781e-01 7.510214e-01 7.680701e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.678028e-01 1.415959e-168
ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9321737 0.2155612 0.7893355
## Neg Pred Value Precision Recall
## 0.5019802 0.7893355 0.9321737
## F1 Prevalence Detection Rate
## 0.8548287 0.7592138 0.7077191
## Detection Prevalence Balanced Accuracy
## 0.8966011 0.5738675
ad_tda_kde_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n5_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n5_svm_fit_re)
diff_tda_kde_5.50.5_svm_n5_3_fold
## Accuracy
## 1 0.09206446
## 2 0.09159083
## 3 0.10011077
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n5_3_fold$probRight
bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009333333
##
## $winRight
## [1] 0.9906667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_3_fold
## $left
## [1] 0.0004650956
##
## $rope
## [1] 0.000245409
##
## $right
## [1] 0.9992895
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
#bf_tda_kde_5.50.5_svm.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold)
## t = 34.216, df = 2, p-value = 0.0008531
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.08269433 0.10648304
## sample estimates:
## mean of x
## 0.09458869
### Test set diff
diff_tda_kde_5.50.5_svm.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n5_test
## Accuracy
## 0.09694922
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_test_odds.left<-bst_tda_kde_5.50.5_svm.n5_test$probLeft/bst_tda_kde_5.50.5_svm.n5_test$probRight
bst_tda_kde_5.50.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1608667
##
## $winRight
## [1] 0.8391333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n5_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n5_test)) #bf_tda_kde_5.50.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))
#Non-TDA-Assisted
#Neural Network
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 12788.553360
## final value 8389.119039
## converged
## # weights: 331
## initial value 14325.472916
## iter 10 value 8359.185647
## final value 8359.178310
## converged
## # weights: 551
## initial value 9489.912405
## iter 10 value 8386.281000
## final value 8386.271561
## converged
## # weights: 111
## initial value 10474.538577
## iter 10 value 8389.250844
## iter 20 value 7995.059789
## iter 30 value 7994.945135
## iter 40 value 7833.563480
## iter 50 value 7636.693393
## iter 60 value 7608.364754
## iter 70 value 7594.239897
## iter 80 value 7586.275133
## iter 90 value 7531.262699
## iter 100 value 7400.034087
## final value 7400.034087
## stopped after 100 iterations
## # weights: 331
## initial value 8930.514994
## iter 10 value 7873.640447
## iter 20 value 7727.766957
## iter 30 value 7723.417418
## iter 40 value 7717.619770
## iter 50 value 7709.699631
## iter 60 value 7708.875294
## final value 7708.650677
## converged
## # weights: 551
## initial value 12050.258993
## iter 10 value 8345.305524
## iter 20 value 7736.724655
## iter 30 value 7710.340866
## iter 40 value 7540.374282
## iter 50 value 7537.557728
## iter 60 value 7520.537764
## iter 70 value 7473.790449
## iter 80 value 7430.396895
## iter 90 value 6924.530326
## iter 100 value 6890.497372
## final value 6890.497372
## stopped after 100 iterations
## # weights: 111
## initial value 11724.156299
## final value 8389.120885
## converged
## # weights: 331
## initial value 9039.445732
## iter 10 value 8373.470366
## final value 8373.467130
## converged
## # weights: 551
## initial value 9367.713136
## iter 10 value 8290.800918
## iter 20 value 8290.365797
## final value 8290.289906
## converged
## # weights: 111
## initial value 9529.357058
## iter 10 value 8377.748390
## final value 8377.723891
## converged
## # weights: 331
## initial value 8477.836939
## final value 8290.395127
## converged
## # weights: 551
## initial value 8408.607042
## iter 10 value 8370.600251
## final value 8370.595169
## converged
## # weights: 111
## initial value 8736.706853
## iter 10 value 8387.763665
## final value 8387.761292
## converged
## # weights: 331
## initial value 12334.349722
## iter 10 value 8257.675924
## iter 20 value 8164.908082
## iter 30 value 7743.399507
## iter 40 value 7710.637003
## iter 50 value 7650.701357
## iter 60 value 7595.163592
## iter 70 value 7479.587214
## iter 80 value 7444.988473
## iter 90 value 7413.405359
## iter 100 value 6951.924548
## final value 6951.924548
## stopped after 100 iterations
## # weights: 551
## initial value 11467.833931
## iter 10 value 7912.798089
## iter 20 value 7789.753807
## iter 30 value 7737.632808
## iter 40 value 7727.114189
## iter 50 value 7679.242047
## iter 60 value 7596.520117
## iter 70 value 7574.744142
## iter 80 value 7540.803785
## iter 90 value 7525.448257
## iter 100 value 7509.157706
## final value 7509.157706
## stopped after 100 iterations
## # weights: 111
## initial value 8378.256321
## final value 8346.342190
## converged
## # weights: 331
## initial value 8623.254579
## final value 8350.632826
## converged
## # weights: 551
## initial value 12458.141905
## final value 8291.988281
## converged
## # weights: 111
## initial value 11497.658504
## iter 10 value 8321.386017
## final value 8318.065524
## converged
## # weights: 331
## initial value 11183.859705
## final value 8387.695367
## converged
## # weights: 551
## initial value 10392.733472
## iter 10 value 8315.720821
## final value 8302.107472
## converged
## # weights: 111
## initial value 10584.054466
## iter 10 value 8350.473900
## iter 20 value 7765.985722
## iter 30 value 7684.953467
## iter 40 value 7219.247332
## iter 50 value 6585.218147
## iter 60 value 6354.692475
## iter 70 value 6197.477102
## iter 80 value 6048.788172
## iter 90 value 6020.668021
## iter 100 value 5987.635176
## final value 5987.635176
## stopped after 100 iterations
## # weights: 331
## initial value 8479.379275
## iter 10 value 8297.412402
## iter 20 value 7750.796340
## iter 30 value 7644.881033
## iter 40 value 7634.993192
## iter 50 value 7422.145607
## iter 60 value 7016.882324
## iter 70 value 6733.796618
## iter 80 value 6637.865547
## iter 90 value 6619.706538
## iter 100 value 6491.184702
## final value 6491.184702
## stopped after 100 iterations
## # weights: 551
## initial value 13094.972997
## iter 10 value 8387.805508
## final value 8387.789581
## converged
## # weights: 111
## initial value 12169.874050
## final value 8387.697303
## converged
## # weights: 331
## initial value 9547.772622
## iter 10 value 8382.210042
## iter 20 value 8380.590981
## iter 20 value 8380.590940
## final value 8380.590386
## converged
## # weights: 551
## initial value 9702.014324
## iter 10 value 8374.895408
## final value 8374.892144
## converged
## # weights: 331
## initial value 16171.839327
## iter 10 value 12727.268744
## iter 20 value 12579.545478
## iter 30 value 12360.091427
## iter 30 value 12360.091355
## final value 12360.091355
## converged
adultNn1Fit
## Neural Network
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.7604966 0.008226790
## 1 1e-04 0.7598824 0.004590314
## 1 1e-01 0.7780914 0.117934510
## 3 0e+00 0.7608476 0.010621859
## 3 1e-04 0.7601457 0.006068214
## 3 1e-01 0.7995434 0.293424323
## 5 0e+00 0.7611985 0.012612517
## 5 1e-04 0.7622078 0.019094493
## 5 1e-01 0.7934467 0.243498889
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
adultNn1Fit$resample
## Accuracy Kappa Resample
## 1 0.7925497 0.2258873 Fold1
## 2 0.8098184 0.3938902 Fold2
## 3 0.7962622 0.2604955 Fold3
ad_nn1_fit_re<-adultNn1Fit$resample[1]
summary(adultNn1Fit)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 1.10 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -4.78 -0.13 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## 1.73 -4.63 1.73 0.00
#varImp (adultNn1Fit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 2285
## >50K 2 67
##
## Accuracy : 0.7659
## 95% CI : (0.7573, 0.7742)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.06312
##
## Kappa : 0.0422
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.99973
## Specificity : 0.02849
## Pos Pred Value : 0.76441
## Neg Pred Value : 0.97101
## Prevalence : 0.75921
## Detection Rate : 0.75901
## Detection Prevalence : 0.99294
## Balanced Accuracy : 0.51411
##
## 'Positive' Class : <=50K
##
nn1_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76586814 0.04220316 0.75734084 0.77423664 0.75921376
## AccuracyPValue McnemarPValue
## 0.06311549 0.00000000
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99973031 0.02848639 0.76440870
## Neg Pred Value Precision Recall
## 0.97101449 0.76440870 0.99973031
## F1 Prevalence Detection Rate
## 0.86637453 0.75921376 0.75900901
## Detection Prevalence Balanced Accuracy
## 0.99293612 0.51410835
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_PC_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 1433.514756
## final value 401.565149
## converged
## # weights: 331
## initial value 2157.289911
## final value 401.565149
## converged
## # weights: 551
## initial value 2600.926981
## final value 400.182690
## converged
## # weights: 111
## initial value 2325.464640
## iter 10 value 403.144218
## iter 20 value 402.875146
## iter 30 value 400.672774
## iter 40 value 394.237785
## iter 50 value 394.077794
## iter 60 value 392.550957
## final value 392.550028
## converged
## # weights: 331
## initial value 1785.159913
## iter 10 value 407.626121
## iter 20 value 398.815097
## iter 30 value 394.884057
## iter 40 value 392.496650
## iter 50 value 391.987659
## final value 391.984313
## converged
## # weights: 551
## initial value 2133.572495
## iter 10 value 627.558520
## iter 20 value 409.101366
## iter 30 value 398.402020
## iter 40 value 398.290292
## iter 50 value 393.621433
## iter 60 value 393.166413
## iter 70 value 391.687475
## iter 80 value 391.677293
## iter 90 value 391.673981
## iter 100 value 391.551269
## final value 391.551269
## stopped after 100 iterations
## # weights: 111
## initial value 2548.121076
## iter 10 value 401.589723
## final value 401.495230
## converged
## # weights: 331
## initial value 1520.513350
## iter 10 value 399.379999
## iter 20 value 399.371704
## iter 20 value 399.371703
## iter 20 value 399.371703
## final value 399.371703
## converged
## # weights: 551
## initial value 1404.782274
## iter 10 value 399.163566
## iter 20 value 399.118644
## iter 20 value 399.118642
## iter 20 value 399.118642
## final value 399.118642
## converged
## # weights: 111
## initial value 1410.811282
## iter 10 value 404.628528
## iter 20 value 404.615530
## final value 404.615507
## converged
## # weights: 331
## initial value 1424.368635
## final value 405.188657
## converged
## # weights: 551
## initial value 2233.268204
## iter 10 value 405.188657
## iter 10 value 405.188657
## iter 10 value 405.188657
## final value 405.188657
## converged
## # weights: 111
## initial value 1851.383385
## iter 10 value 406.087580
## iter 20 value 405.878186
## iter 30 value 405.833600
## final value 405.832583
## converged
## # weights: 331
## initial value 1949.264408
## iter 10 value 429.725878
## iter 20 value 400.326839
## iter 30 value 395.245256
## iter 40 value 389.268638
## iter 50 value 387.952179
## iter 60 value 387.532035
## iter 70 value 387.527307
## iter 80 value 387.167430
## final value 387.054265
## converged
## # weights: 551
## initial value 4056.146122
## iter 10 value 405.994701
## iter 20 value 405.455628
## iter 30 value 396.167370
## iter 40 value 396.154176
## iter 50 value 396.150897
## iter 60 value 396.149050
## iter 70 value 396.101972
## iter 80 value 396.046879
## iter 90 value 396.024288
## iter 100 value 395.973880
## final value 395.973880
## stopped after 100 iterations
## # weights: 111
## initial value 2851.518161
## final value 405.260777
## converged
## # weights: 331
## initial value 4850.974678
## iter 10 value 404.851149
## final value 404.841118
## converged
## # weights: 551
## initial value 3058.124604
## iter 10 value 402.839806
## iter 20 value 402.807782
## iter 30 value 397.867197
## iter 40 value 395.966942
## iter 50 value 395.404719
## final value 395.373616
## converged
## # weights: 111
## initial value 1057.065473
## iter 10 value 397.209430
## final value 397.079618
## converged
## # weights: 331
## initial value 2486.540786
## iter 10 value 401.260696
## iter 20 value 401.214798
## final value 401.214756
## converged
## # weights: 551
## initial value 2108.596103
## final value 400.538296
## converged
## # weights: 111
## initial value 3532.254073
## iter 10 value 397.981475
## iter 20 value 394.440169
## iter 30 value 393.349167
## iter 40 value 393.270587
## iter 50 value 391.663427
## iter 60 value 384.541467
## iter 70 value 371.638406
## iter 80 value 361.687354
## iter 90 value 256.939084
## iter 100 value 233.687055
## final value 233.687055
## stopped after 100 iterations
## # weights: 331
## initial value 2336.800165
## iter 10 value 402.036792
## iter 20 value 395.328606
## iter 30 value 393.887192
## iter 40 value 388.900423
## iter 50 value 372.635580
## iter 60 value 330.468293
## iter 70 value 313.825876
## iter 80 value 256.888057
## iter 90 value 238.726987
## iter 100 value 229.042759
## final value 229.042759
## stopped after 100 iterations
## # weights: 551
## initial value 3652.948225
## iter 10 value 408.614243
## iter 20 value 394.380756
## iter 30 value 393.077234
## iter 40 value 390.691884
## iter 50 value 389.441149
## iter 60 value 383.445919
## iter 70 value 381.315711
## iter 80 value 376.789414
## iter 90 value 346.029009
## iter 100 value 305.900243
## final value 305.900243
## stopped after 100 iterations
## # weights: 111
## initial value 3520.124367
## final value 401.540696
## converged
## # weights: 331
## initial value 1733.320568
## iter 10 value 401.544764
## final value 401.544755
## converged
## # weights: 551
## initial value 1736.721049
## iter 10 value 400.393356
## iter 20 value 400.387873
## iter 30 value 397.554935
## iter 40 value 396.774029
## iter 50 value 395.575322
## iter 60 value 395.053893
## iter 70 value 394.822481
## iter 80 value 394.744657
## iter 90 value 394.143307
## iter 100 value 393.395083
## final value 393.395083
## stopped after 100 iterations
## # weights: 111
## initial value 2683.872966
## iter 10 value 605.431356
## iter 20 value 599.665056
## iter 30 value 595.284711
## iter 40 value 591.695562
## iter 50 value 586.061543
## iter 60 value 560.213796
## iter 70 value 487.607495
## iter 80 value 422.202477
## iter 90 value 375.851443
## iter 100 value 356.141733
## final value 356.141733
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n1_NN1Fit0
## Neural Network
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3278, 3279, 3277
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.9733579 0.00000000
## 1 1e-04 0.9733579 0.00000000
## 1 1e-01 0.9733579 0.00000000
## 3 0e+00 0.9733579 0.00000000
## 3 1e-04 0.9733579 0.00000000
## 3 1e-01 0.9717318 0.02020914
## 5 0e+00 0.9733579 0.00000000
## 5 1e-04 0.9733579 0.00000000
## 5 1e-01 0.9733579 0.00000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 1 and decay = 0.1.
Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9731544 0 Fold1
## 2 0.9737485 0 Fold2
## 3 0.9731707 0 Fold3
ad_tda_pc_5.50.5_n1_nn1_fit_re<-Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_NN1Fit0)
## a 108-1-1 network with 111 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 3.57 -0.09 1.63 0.09 0.69 0.00 2.49 -1.43
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.92 1.02 0.00 0.00 0.24 0.44 0.54 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.39 1.25 0.22 -0.09 0.43 -0.23 -2.00 1.85
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -0.69 0.00 -0.85 2.09 -0.23 0.43 0.62 2.45
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.07 0.00 0.00 1.63 1.39 0.04 -0.91
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -0.61 -0.74 0.04 0.93 0.37 0.00 -0.62 0.87
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.98 0.86 -0.67 2.76 0.67 0.13 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.01 0.48 0.34 1.82 0.68 0.26 0.01 3.56
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 -0.08 0.97 0.00 0.42 -1.80 0.12
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 1.29 0.00 0.01 0.35 0.92 0.14 0.91 0.49
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.04 0.01 -1.48 -2.15
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.02 0.63 0.00 0.11 0.02 0.63 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.38 0.06 0.29 0.21 0.34 0.30 1.15
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.11 0.00 -0.94 0.00 0.02
## b->o h1->o
## 0.76 7.32
# Predict outcome using Adult_TDA_PC_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_pc_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n1_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n1_3_fold
## Accuracy
## 1 -0.1806047
## 2 -0.1639301
## 3 -0.1769086
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9914
##
## $winRope
## [1] 0.0086
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.9993661
##
## $rope
## [1] 0.0001302352
##
## $right
## [1] 0.0005036318
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
#bf_tda_pca_5.50.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold)
## t = -34.378, df = 2, p-value = 0.0008451
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1955688 -0.1520601
## sample estimates:
## mean of x
## -0.1738144
### Test set diff
diff_tda_pca_5.50.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n1_test
## Accuracy
## 0.5250819
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_test_odds.left<-bst_tda_pca_5.50.5_nn1.n1_test$probLeft/bst_tda_pca_5.50.5_nn1.n1_test$probRight
bst_tda_pca_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1585333
##
## $winRight
## [1] 0.8414667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
##Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, size=2, range = 0.6,, type='class')
#Neural Network 1
Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 5607.876099
## final value 5607.438951
## converged
## # weights: 331
## initial value 5735.455529
## iter 10 value 5598.930746
## final value 5598.926078
## converged
## # weights: 551
## initial value 6047.566074
## iter 10 value 5579.408092
## final value 5579.391908
## converged
## # weights: 111
## initial value 5800.886111
## iter 10 value 5607.455321
## iter 20 value 5607.440736
## final value 5607.440622
## converged
## # weights: 331
## initial value 5664.820732
## iter 10 value 5408.177239
## iter 20 value 5368.399441
## iter 30 value 5352.530152
## iter 40 value 5191.916552
## iter 50 value 5158.965193
## iter 60 value 5154.453854
## iter 70 value 4812.388812
## iter 80 value 4558.216801
## iter 90 value 4462.884365
## iter 100 value 4453.258983
## final value 4453.258983
## stopped after 100 iterations
## # weights: 551
## initial value 6061.381899
## iter 10 value 5555.188679
## iter 20 value 5518.749362
## iter 30 value 5489.970104
## iter 40 value 5407.626597
## iter 50 value 5361.352233
## iter 60 value 5361.220120
## iter 70 value 5354.887308
## iter 80 value 5350.713310
## iter 90 value 5347.519382
## iter 100 value 5333.995099
## final value 5333.995099
## stopped after 100 iterations
## # weights: 111
## initial value 6252.983273
## final value 5607.440681
## converged
## # weights: 331
## initial value 7193.459206
## iter 10 value 5604.425349
## final value 5604.420229
## converged
## # weights: 551
## initial value 5794.790497
## iter 10 value 5598.949337
## final value 5598.948174
## converged
## # weights: 111
## initial value 5768.462160
## iter 10 value 5603.365814
## final value 5603.362899
## converged
## # weights: 331
## initial value 6532.325537
## iter 10 value 5600.943949
## final value 5600.928895
## converged
## # weights: 551
## initial value 5643.169581
## iter 10 value 5588.126472
## final value 5588.123342
## converged
## # weights: 111
## initial value 6342.187532
## iter 10 value 5607.623873
## final value 5607.620006
## converged
## # weights: 331
## initial value 5680.388523
## iter 10 value 5606.390660
## iter 20 value 5334.078519
## iter 30 value 5326.153844
## final value 5326.149995
## converged
## # weights: 551
## initial value 5703.528309
## iter 10 value 5606.393025
## iter 20 value 5289.121827
## iter 30 value 5230.151453
## iter 40 value 5199.493063
## iter 50 value 5098.579769
## iter 60 value 4886.547088
## iter 70 value 4821.911804
## iter 80 value 4601.041210
## iter 90 value 4536.775843
## iter 100 value 4474.470669
## final value 4474.470669
## stopped after 100 iterations
## # weights: 111
## initial value 5900.730339
## iter 10 value 5339.267986
## iter 20 value 5301.732672
## iter 30 value 5226.367495
## iter 40 value 5189.797763
## iter 50 value 5185.547922
## iter 60 value 5184.545884
## iter 70 value 5179.334854
## iter 80 value 5179.220936
## iter 90 value 5176.469028
## iter 100 value 5168.203933
## final value 5168.203933
## stopped after 100 iterations
## # weights: 331
## initial value 5721.324085
## final value 5607.624093
## converged
## # weights: 551
## initial value 5591.099026
## iter 10 value 5469.068721
## iter 20 value 5465.311718
## iter 30 value 5317.530066
## iter 40 value 5314.474026
## iter 50 value 5314.470131
## iter 50 value 5314.470128
## iter 50 value 5314.470114
## final value 5314.470114
## converged
## # weights: 111
## initial value 5743.963390
## iter 10 value 5602.757357
## final value 5602.754555
## converged
## # weights: 331
## initial value 5745.832614
## iter 10 value 5602.146313
## final value 5602.146098
## converged
## # weights: 551
## initial value 5911.465776
## final value 5608.225885
## converged
## # weights: 111
## initial value 6820.819861
## iter 10 value 5552.779178
## iter 20 value 5542.708372
## iter 30 value 5542.478992
## iter 40 value 5386.176326
## iter 50 value 5311.508366
## iter 60 value 5309.827601
## iter 70 value 5269.499333
## iter 80 value 5212.111587
## iter 90 value 5160.775942
## iter 100 value 5113.545542
## final value 5113.545542
## stopped after 100 iterations
## # weights: 331
## initial value 6237.839329
## iter 10 value 5605.614658
## iter 20 value 5605.592911
## iter 30 value 5349.688222
## iter 40 value 5335.192391
## iter 50 value 5332.510359
## iter 60 value 5327.456408
## iter 70 value 5321.007105
## iter 80 value 5306.766060
## iter 90 value 5285.529799
## iter 100 value 5216.449418
## final value 5216.449418
## stopped after 100 iterations
## # weights: 551
## initial value 6484.690170
## iter 10 value 5372.726873
## iter 20 value 5233.152436
## iter 30 value 5152.795264
## iter 40 value 4993.872333
## iter 50 value 4796.481116
## iter 60 value 4576.495932
## iter 70 value 4541.187994
## iter 80 value 4486.897704
## iter 90 value 4404.901516
## iter 100 value 4310.826482
## final value 4310.826482
## stopped after 100 iterations
## # weights: 111
## initial value 5666.854577
## final value 5608.227552
## converged
## # weights: 331
## initial value 6021.163120
## iter 10 value 5607.633517
## final value 5607.630322
## converged
## # weights: 551
## initial value 6226.115771
## iter 10 value 5602.772949
## final value 5602.771749
## converged
## # weights: 551
## initial value 8490.573899
## iter 10 value 8341.416186
## iter 20 value 8272.420386
## iter 30 value 7943.233730
## iter 40 value 7805.117832
## iter 50 value 7784.484008
## iter 60 value 7769.156248
## iter 70 value 7760.415003
## iter 80 value 7736.962852
## iter 90 value 7680.897033
## iter 100 value 7625.137745
## final value 7625.137745
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n2_NN1Fit0
## Neural Network
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8137, 8137, 8138
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.5447321 0.00000000
## 1 1e-04 0.5437491 0.04885036
## 1 1e-01 0.5457154 0.05094844
## 3 0e+00 0.5447321 0.00000000
## 3 1e-04 0.5447321 0.00000000
## 3 1e-01 0.6133835 0.20156431
## 5 0e+00 0.5447321 0.00000000
## 5 1e-04 0.5447321 0.00000000
## 5 1e-01 0.6588604 0.29018640
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7084562 0.4199237 Fold3
## 2 0.5446056 0.0000000 Fold1
## 3 0.7235193 0.4506355 Fold2
ad_tda_pc_5.50.5_n2_nn1_fit_re<-Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -0.01 0.00 0.01 -0.01 0.00 -0.01 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.01 0.00 0.01 0.00 -0.01 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.01 -0.01 0.01 -0.01 0.01 -0.01 -0.01
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.01 0.00 0.00 0.01 0.01 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.01 -0.01 -0.01 0.00 0.00 0.00 0.00 -0.01
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 -0.01 0.00 -0.01 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.01 0.00 0.00 0.01 0.00 -0.01 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 -0.01 0.00 -0.01 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.01
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 -0.01 0.01 0.00 0.01 0.01 0.00 0.01
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.01 0.00 0.00 -0.01 -0.01 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.01 0.00 0.00 -0.01 0.01 0.01 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.01 -0.01 0.00 0.00 0.01 0.01 0.01
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.01 -0.01 0.01 0.01
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 1.81 -0.01 -0.34 -1.82 2.57 0.00 1.67 -0.47
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.50 -0.30 0.01 0.00 2.02 -0.96 0.03 0.01
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## -0.80 0.80 0.16 -0.26 -0.53 -0.57 -0.09 3.66
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -1.22 -0.01 -0.12 -0.30 0.34 -0.22 0.00 2.42
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 -0.13 -0.09 -0.16 -0.34 -0.92 0.00 1.64
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -2.25 1.45 -0.11 1.87 0.15 0.00 0.23 0.12
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.38 -0.41 0.03 2.80 -0.37 0.00 0.16 -0.29
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.49 0.40 0.03 -0.34 -0.44 2.16 -0.79 2.60
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 -0.06 -0.11 0.01 2.39 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 -0.01 -0.01 0.00 -0.16 0.87 -0.20
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 -0.01 -0.01 0.00 0.00 0.01 0.03
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.03 0.00 -0.01 0.00 0.25 -0.01 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.01 -0.02 0.00 -0.04 -0.56 0.00 0.05 -0.03
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 -0.01 -0.70 0.04 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## -0.01 0.00 0.00 0.01 -0.01 0.01 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 -0.01 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## -0.01 -0.01 0.01 0.00 0.01 0.00 0.00 0.01
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 -0.01 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.01 -0.01 0.00 -0.01 0.01 0.00 0.00 0.01
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.01 0.00 0.01 -0.01 0.00 0.01 0.01 0.01
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 -0.01 0.01 0.01 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.01 0.00 0.00 0.01 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.02 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.01 0.01 0.01 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 -0.01 0.01 0.00 0.00 0.00 0.01 -0.01
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 -0.01 0.00 0.00 -0.01
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.01 0.00 0.00 0.01 0.00 0.00 -0.01 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## -0.01 0.01 0.01 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 0.01 -0.01 -0.01 0.01 0.01 -0.01 0.01
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.01 0.00 0.00 0.11 -0.01 0.00 -0.01 0.01
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.01 0.00 0.00 -0.01 0.00 -0.01 0.01
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 -0.01 0.00 0.00 0.00 -0.01 -0.01 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## -0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.01 -0.01 0.00 -0.01
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 -0.01
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## -0.01 0.00 0.00 0.00 0.00 0.00 0.01 -0.01
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.01 0.00 0.00 0.01 0.00 -0.01 0.01 0.01
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 -0.01 -0.01 0.00 0.00 0.01
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 -0.01 0.00 0.00 0.00 0.00 0.01 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 -0.01 0.01 0.00 0.01 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.01 0.00 -0.01 0.01 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## -0.01 0.01 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 -0.01 0.00 -0.01 0.00 0.00 -0.01 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 -0.01 -0.18 0.00 0.01 0.01 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.01 0.01 0.00 0.00 0.00 -0.01 0.01
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 -0.01 0.00 0.00 0.00 0.00 -0.01 -0.01
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.01 0.01 0.01 0.00 0.00 0.00 -0.01 -0.01
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.01 0.00 0.00 -0.01 0.01 -0.01
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 -0.01
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 -0.01
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.04 0.01 0.00 0.00 0.00 0.01 -0.01 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## -0.01 -0.01 0.01 0.00 0.01 0.00 0.01 -0.01
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.01 0.00 -0.01 0.00 0.00 -0.01 -0.01 -0.01
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## -0.01 -0.01 0.00 0.00 0.00 0.00 0.01 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## -0.01 0.00 0.00 -0.01 -0.01 0.00 -0.01 -0.01
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## -0.01 0.00 0.00 -0.01 -0.01
## b->o h1->o h2->o h3->o h4->o h5->o
## 2.60 0.29 -4.91 1.66 0.28 0.56
# Predict outcome using Adult_TDA_PC_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3987 1415
## >50K 3429 937
##
## Accuracy : 0.5041
## 95% CI : (0.4941, 0.5141)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0495
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5376
## Specificity : 0.3984
## Pos Pred Value : 0.7381
## Neg Pred Value : 0.2146
## Prevalence : 0.7592
## Detection Rate : 0.4082
## Detection Prevalence : 0.5530
## Balanced Accuracy : 0.4680
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3987 1415
## >50K 3429 937
##
## Accuracy : 0.5041
## 95% CI : (0.4941, 0.5141)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0495
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5376
## Specificity : 0.3984
## Pos Pred Value : 0.7381
## Neg Pred Value : 0.2146
## Prevalence : 0.7592
## Detection Rate : 0.4082
## Detection Prevalence : 0.5530
## Balanced Accuracy : 0.4680
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.040950e-01 -4.951743e-02 4.941287e-01 5.140589e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 6.154697e-184
ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.5376214 0.3983844 0.7380600
## Neg Pred Value Precision Recall
## 0.2146129 0.7380600 0.5376214
## F1 Prevalence Detection Rate
## 0.6220939 0.7592138 0.4081695
## Detection Prevalence Balanced Accuracy
## 0.5530303 0.4680029
ad_tda_pc_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n2_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n2_3_fold
## Accuracy
## 1 0.08409345
## 2 0.26521282
## 3 0.07274288
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0088
##
## $winRight
## [1] 0.9912
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.08571311
##
## $rope
## [1] 0.01986901
##
## $right
## [1] 0.8944179
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
#bf_tda_pca_5.50.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold)
## t = 2.2563, df = 2, p-value = 0.1527
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1275918 0.4089579
## sample estimates:
## mean of x
## 0.140683
### Test set diff
diff_tda_pca_5.50.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n2_test
## Accuracy
## 0.2617731
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_test_odds.left<-bst_tda_pca_5.50.5_nn1.n2_test$probLeft/bst_tda_pca_5.50.5_nn1.n2_test$probRight
bst_tda_pca_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1603667
##
## $winRight
## [1] 0.8396333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node3
#Neural Network 1
Adult_TDA_PC_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 5098.560007
## final value 4744.174576
## converged
## # weights: 331
## initial value 10279.912483
## iter 10 value 4702.006919
## iter 10 value 4702.006918
## iter 10 value 4702.006918
## final value 4702.006918
## converged
## # weights: 551
## initial value 6147.464370
## iter 10 value 4732.360697
## final value 4732.352720
## converged
## # weights: 111
## initial value 4775.759254
## iter 10 value 4744.249140
## final value 4744.248602
## converged
## # weights: 331
## initial value 7459.434280
## iter 10 value 4742.065875
## iter 20 value 4507.886953
## iter 30 value 4407.197729
## iter 40 value 4327.669257
## iter 50 value 4319.892598
## iter 60 value 4319.054971
## iter 70 value 4304.831330
## iter 80 value 4302.469138
## iter 90 value 4279.163303
## iter 100 value 4212.279949
## final value 4212.279949
## stopped after 100 iterations
## # weights: 551
## initial value 6660.420339
## iter 10 value 4586.284295
## iter 20 value 4481.259731
## iter 30 value 4476.572315
## final value 4475.593432
## converged
## # weights: 111
## initial value 7124.668910
## final value 4744.176417
## converged
## # weights: 331
## initial value 5876.750639
## final value 4744.180119
## converged
## # weights: 551
## initial value 6567.419229
## iter 10 value 4733.650317
## final value 4733.643072
## converged
## # weights: 111
## initial value 4861.507422
## iter 10 value 4699.291245
## iter 20 value 4622.018416
## iter 30 value 4577.678184
## iter 40 value 4569.768952
## final value 4569.758382
## converged
## # weights: 331
## initial value 9759.526518
## iter 10 value 4743.927264
## final value 4743.915183
## converged
## # weights: 551
## initial value 9608.505775
## iter 10 value 4729.186968
## iter 20 value 4729.135119
## iter 20 value 4729.135094
## iter 20 value 4729.135093
## final value 4729.135093
## converged
## # weights: 111
## initial value 5520.586253
## iter 10 value 4745.538870
## iter 10 value 4745.538829
## iter 10 value 4745.538802
## final value 4745.538802
## converged
## # weights: 331
## initial value 9744.835276
## iter 10 value 4746.578731
## iter 20 value 4488.131741
## iter 30 value 4460.042448
## iter 40 value 4429.883635
## iter 50 value 4400.286414
## iter 60 value 4390.982274
## iter 70 value 4307.548500
## iter 80 value 4162.284183
## iter 90 value 4037.347059
## iter 100 value 3978.169086
## final value 3978.169086
## stopped after 100 iterations
## # weights: 551
## initial value 8102.902366
## iter 10 value 4561.385980
## iter 20 value 4473.689575
## iter 30 value 4462.451517
## iter 40 value 4434.295631
## iter 50 value 4375.103599
## iter 60 value 4370.194879
## iter 70 value 4353.327675
## iter 80 value 4314.975737
## iter 90 value 4267.033014
## iter 100 value 4263.546381
## final value 4263.546381
## stopped after 100 iterations
## # weights: 111
## initial value 5705.745688
## iter 10 value 4743.930823
## final value 4743.924418
## converged
## # weights: 331
## initial value 14527.338073
## final value 4721.751065
## converged
## # weights: 551
## initial value 5098.383135
## iter 10 value 4733.603319
## final value 4733.597075
## converged
## # weights: 111
## initial value 7578.269479
## iter 10 value 4718.179799
## final value 4718.079635
## converged
## # weights: 331
## initial value 4897.274613
## final value 4680.851382
## converged
## # weights: 551
## initial value 7314.215011
## iter 10 value 4671.762280
## iter 20 value 4510.622311
## iter 30 value 4507.095786
## final value 4507.088580
## converged
## # weights: 111
## initial value 5733.004515
## iter 10 value 4744.063181
## iter 10 value 4744.063138
## final value 4744.063138
## converged
## # weights: 331
## initial value 5465.634747
## iter 10 value 4743.995910
## iter 20 value 4658.882993
## iter 30 value 4451.158372
## iter 40 value 4434.729615
## iter 50 value 4432.704549
## iter 60 value 4431.813387
## iter 70 value 4424.233026
## iter 80 value 4423.631000
## iter 90 value 4422.994570
## iter 90 value 4422.994538
## iter 90 value 4422.994538
## final value 4422.994538
## converged
## # weights: 551
## initial value 5369.740792
## iter 10 value 4737.722875
## iter 20 value 4712.693915
## iter 30 value 4470.303671
## iter 40 value 4448.827229
## iter 50 value 4401.422600
## iter 60 value 4324.161594
## iter 70 value 4305.408976
## iter 80 value 4278.118413
## iter 90 value 4265.178554
## iter 100 value 4148.410863
## final value 4148.410863
## stopped after 100 iterations
## # weights: 111
## initial value 6699.315874
## final value 4743.917018
## converged
## # weights: 331
## initial value 7469.061671
## final value 4743.921179
## converged
## # weights: 551
## initial value 6716.719896
## iter 10 value 4722.340581
## final value 4722.325140
## converged
## # weights: 551
## initial value 8764.205748
## iter 10 value 6939.221704
## iter 20 value 6832.458704
## iter 30 value 6261.688571
## iter 40 value 6188.278370
## iter 50 value 5996.458226
## iter 60 value 5952.761569
## iter 70 value 5714.106166
## iter 80 value 5590.169200
## iter 90 value 5478.799841
## iter 100 value 5437.122525
## final value 5437.122525
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n3_NN1Fit0
## Neural Network
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.7760575 0.034914825
## 1 1e-04 0.7715257 0.000510032
## 1 1e-01 0.7714502 0.000000000
## 3 0e+00 0.7754530 0.028337449
## 3 1e-04 0.7719789 0.003912412
## 3 1e-01 0.8022666 0.217339741
## 5 0e+00 0.7776427 0.051308166
## 5 1e-04 0.7728851 0.009644625
## 5 1e-01 0.8027944 0.223810663
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8049388 0.2576969 Fold3
## 2 0.7967369 0.1862816 Fold1
## 3 0.8067075 0.2274535 Fold2
ad_tda_pc_5.50.5_n3_nn1_fit_re<-Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.01 0.01 0.00 0.00 0.02 0.01 0.01 -0.01
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 -0.01 0.00 0.27 0.01 -0.01 -0.01 0.01
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## -0.01 -0.01 0.00 -0.01 0.01 0.00 -0.01 -0.01
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -0.01 -0.01 0.00 0.00 -0.01 0.01 0.00 -0.01
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.01 0.00 0.00 0.00 0.01 0.02 0.01 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.01 0.01 -0.01 -0.01 0.00 0.00 0.00 0.01
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 -0.02 0.01 -0.01 -0.01 0.01 0.00 -0.01
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.02 0.02 0.01 0.00 0.01 0.02 0.01
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.06 0.00 0.00 0.00 0.00 -0.01 -0.02 0.02
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.01 -0.02 0.02 -0.01 -0.01 -0.02 -0.02 0.01
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.01 0.01 0.01 0.01 0.00 0.01 -0.01 -0.01
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.01 0.00 0.01 -0.01 -0.01 0.02 0.00 0.02
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.01 0.00 0.01 -0.01 0.00 0.01 -0.01 0.01
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.01 -0.01 0.01 -0.01
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.01 0.01 -0.01 -0.01 -0.01 0.02 0.01 0.01
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.01 -0.01 0.00 0.00 -0.01 0.01 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.01 -0.01 0.01 -0.01 0.00 -0.01 0.00 0.01
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.01 0.02 0.00 -0.01 -0.01 0.02 0.00 -0.01
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.01 0.00 -0.02 0.01 0.01 0.01 0.01 -0.01
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 -0.01 0.01 0.01 -0.01 0.01 -0.01 -0.01
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.01 0.00 0.00 0.01 0.00 -0.01 0.01 0.02
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.02 0.00 0.02 0.00 -0.01 0.01 0.01 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.02 0.00 0.00 0.01 0.02 0.02 0.01
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.01 -0.01 0.01 -0.01 0.01 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.01 -0.01 0.00 0.00 0.02 0.01 0.01
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.01 0.02 -0.01 0.01 -0.01 0.01 -0.01 -0.01
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.01 -0.01 0.02 0.02 0.01 0.01 -0.02 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 -0.01 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.04 4.33 0.02 -0.02 0.00 0.01 0.03 0.01
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.04 0.01 -0.01 0.02 -0.01 -0.01 -0.05 0.01
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 -0.03 -0.01 -0.02 0.03 -0.10 0.01 0.05
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## -0.01 0.01 0.13 0.01 0.82 0.00 -0.02 0.14
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.01 -0.01 -0.09 0.01 0.02 0.00 -0.02 -0.04
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.04 0.04 0.01 -0.01 -0.02 0.02 -0.01 0.05
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.01 -0.02 -0.01 0.14 0.00 0.01 0.01 -0.11
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 -0.01 -0.03 0.01 0.06 -0.08 0.11
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## -0.57 0.00 -0.18 -0.02 0.01 0.02 0.02 0.01
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 -0.01 0.00 -0.01 -0.01 0.02 0.02 -0.02
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## -0.01 0.01 -0.01 0.02 0.01 0.01 0.00 -0.01
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## -0.01 -0.01 0.02 0.01 0.01 -0.01 0.02 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.01 0.01 -0.01 0.00 0.01 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.01 0.10 -0.02 0.02
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 1.28 -0.01 -0.69 -1.07 0.58 0.00 -1.01 1.49
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.94 0.38 0.60 0.00 0.06 -0.43 -1.45 0.45
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.21 1.60 -0.26 0.44 -0.01 0.85 -0.25 -0.21
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.94 0.61 -0.68 -0.65 -0.13 0.78 0.37 -0.53
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.20 0.23 0.49 -0.30 -0.67 -1.69 1.05 -0.10
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## -0.14 5.35 -0.27 -0.74 0.02 0.40 0.00 -0.32
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## -0.90 -1.33 0.59 2.12 0.06 1.19 1.01 -0.26
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## -2.88 0.53 0.00 -0.31 -0.09 1.14 -0.77 2.03
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 -0.01 -0.45 -2.11 0.10 2.02 1.54
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## -2.33 1.11 -0.01 0.15 -2.43 -0.43 -1.93 0.10
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.23 -0.86 0.02 0.00 1.18 0.46 1.96 0.95
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 -0.36 -1.56 -0.27 1.03 1.32 1.04 0.10
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## -0.38 -1.15 1.33 0.67 -0.92 -0.36 -0.53 1.08
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.13 0.65 -0.92 1.43 -0.40
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## -0.01 0.01 0.01 -0.01 -0.01 0.00 0.01 0.01
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## -0.01 0.00 0.00 0.03 0.00 0.00 0.02 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## -0.01 -0.02 -0.01 0.00 0.01 0.01 0.00 -0.01
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.01 0.00 0.01 -0.01 0.00 -0.01 0.02 0.01
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## -0.01 -0.01 -0.01 -0.01 0.01 0.00 -0.02 -0.01
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.01 0.00 0.02 -0.01 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.01 0.00 0.00 0.01 0.01 0.01 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## -0.01 0.01 0.01 0.00 -0.01 -0.02 -0.02 0.01
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 -0.01 0.00 -0.01 -0.01 0.01 0.01 0.02
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## -0.01 0.00 -0.01 0.00 -0.02 0.01 0.00 -0.01
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.02 -0.02 -0.02 0.00 -0.01
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.01 -0.01 0.02 0.00 -0.02 -0.01 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## -0.02 -0.01 0.00 0.00 -0.01 -0.01 -0.01 0.02
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.02 0.01 0.01
## b->o h1->o h2->o h3->o h4->o h5->o
## 0.56 0.54 0.51 -1.56 -3.39 0.52
# Predict outcome using Adult_TDA_PC_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4912 1711
## >50K 2504 641
##
## Accuracy : 0.5685
## 95% CI : (0.5586, 0.5783)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0584
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6624
## Specificity : 0.2725
## Pos Pred Value : 0.7417
## Neg Pred Value : 0.2038
## Prevalence : 0.7592
## Detection Rate : 0.5029
## Detection Prevalence : 0.6780
## Balanced Accuracy : 0.4674
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4912 1711
## >50K 2504 641
##
## Accuracy : 0.5685
## 95% CI : (0.5586, 0.5783)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0584
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6624
## Specificity : 0.2725
## Pos Pred Value : 0.7417
## Neg Pred Value : 0.2038
## Prevalence : 0.7592
## Detection Rate : 0.5029
## Detection Prevalence : 0.6780
## Balanced Accuracy : 0.4674
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.684889e-01 -5.839249e-02 5.585965e-01 5.783405e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.144301e-34
ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6623517 0.2725340 0.7416579
## Neg Pred Value Precision Recall
## 0.2038156 0.7416579 0.6623517
## F1 Prevalence Detection Rate
## 0.6997649 0.7592138 0.5028665
## Detection Prevalence Balanced Accuracy
## 0.6780303 0.4674428
ad_tda_pc_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n3_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n3_3_fold
## Accuracy
## 1 -0.01238914
## 2 0.01308146
## 3 -0.01044528
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0.3597
##
## $winRope
## [1] 0.5833333
##
## $winRight
## [1] 0.05696667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.2746427
##
## $rope
## [1] 0.5773687
##
## $right
## [1] 0.1479887
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
#bf_tda_pca_5.50.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold)
## t = -0.39717, df = 2, p-value = 0.7296
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03847027 0.03196829
## sample estimates:
## mean of x
## -0.003250987
### Test set diff
diff_tda_pca_5.50.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n3_test
## Accuracy
## 0.1973792
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_test_odds.left<-bst_tda_pca_5.50.5_nn1.n3_test$probLeft/bst_tda_pca_5.50.5_nn1.n3_test$probRight
bst_tda_pca_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1615
##
## $winRight
## [1] 0.8385
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_PC_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 10215.062025
## final value 2373.078436
## converged
## # weights: 331
## initial value 8912.050726
## final value 2366.566547
## converged
## # weights: 551
## initial value 12960.931372
## iter 10 value 2400.881216
## final value 2344.220816
## converged
## # weights: 111
## initial value 8102.117664
## iter 10 value 2373.482839
## final value 2373.482405
## converged
## # weights: 331
## initial value 8995.269604
## iter 10 value 2369.379529
## iter 20 value 2366.744281
## iter 30 value 2353.317434
## iter 40 value 2270.525432
## iter 50 value 2177.641070
## iter 60 value 2173.809183
## iter 70 value 2161.971961
## iter 80 value 2127.497955
## iter 90 value 2085.234347
## iter 100 value 2053.745643
## final value 2053.745643
## stopped after 100 iterations
## # weights: 551
## initial value 11083.858508
## iter 10 value 2405.629299
## iter 20 value 2377.031029
## iter 30 value 2373.345990
## iter 40 value 2366.816854
## iter 50 value 2068.084212
## iter 60 value 1938.130453
## iter 70 value 1887.453259
## iter 80 value 1856.430397
## iter 90 value 1855.091535
## iter 100 value 1850.825584
## final value 1850.825584
## stopped after 100 iterations
## # weights: 111
## initial value 6560.353151
## iter 10 value 2366.618808
## final value 2366.570656
## converged
## # weights: 331
## initial value 6868.961704
## final value 2373.084225
## converged
## # weights: 551
## initial value 12544.544610
## final value 2373.087769
## converged
## # weights: 111
## initial value 5231.486883
## final value 2375.976970
## converged
## # weights: 331
## initial value 4165.172804
## final value 2375.976970
## converged
## # weights: 551
## initial value 10210.941720
## final value 2375.976970
## converged
## # weights: 111
## initial value 10782.923774
## iter 10 value 2376.095327
## iter 20 value 2170.549088
## iter 30 value 2163.415262
## iter 40 value 2098.088894
## iter 50 value 2047.029244
## iter 60 value 2039.047454
## iter 70 value 2026.992621
## iter 80 value 1943.058856
## iter 90 value 1822.527048
## iter 100 value 1764.802770
## final value 1764.802770
## stopped after 100 iterations
## # weights: 331
## initial value 5335.257141
## iter 10 value 2358.226755
## iter 20 value 2302.549006
## iter 30 value 2172.068936
## iter 40 value 2093.786832
## iter 50 value 2056.355211
## iter 60 value 1996.335934
## iter 70 value 1820.762875
## iter 80 value 1743.590981
## iter 90 value 1677.160852
## iter 100 value 1626.653533
## final value 1626.653533
## stopped after 100 iterations
## # weights: 551
## initial value 3724.457301
## iter 10 value 2363.486088
## iter 20 value 2329.618202
## iter 30 value 2155.160494
## iter 40 value 2127.599779
## iter 50 value 2088.594764
## iter 60 value 2062.393379
## iter 70 value 2047.273018
## iter 80 value 2046.836162
## iter 90 value 2005.656000
## iter 100 value 1897.702858
## final value 1897.702858
## stopped after 100 iterations
## # weights: 111
## initial value 9745.396271
## final value 2358.051294
## converged
## # weights: 331
## initial value 7844.331542
## final value 2375.982646
## converged
## # weights: 551
## initial value 9724.329122
## final value 2375.985595
## converged
## # weights: 111
## initial value 9164.023050
## final value 2373.078436
## converged
## # weights: 331
## initial value 12173.105910
## final value 2373.078436
## converged
## # weights: 551
## initial value 7790.741394
## final value 2373.078436
## converged
## # weights: 111
## initial value 8883.851332
## iter 10 value 2404.943138
## iter 20 value 2361.093377
## final value 2361.084924
## converged
## # weights: 331
## initial value 15993.137153
## iter 10 value 2373.485295
## iter 20 value 2373.335210
## final value 2373.281742
## converged
## # weights: 551
## initial value 4975.138154
## iter 10 value 2367.758402
## iter 20 value 2167.417966
## iter 30 value 2160.747277
## iter 40 value 2143.745047
## iter 50 value 2141.970933
## iter 60 value 2141.048868
## final value 2140.000566
## converged
## # weights: 111
## initial value 9009.415585
## iter 10 value 2373.081116
## iter 10 value 2373.081115
## iter 10 value 2373.081115
## final value 2373.081115
## converged
## # weights: 331
## initial value 11724.579916
## final value 2373.084145
## converged
## # weights: 551
## initial value 5969.530058
## final value 2373.088183
## converged
## # weights: 551
## initial value 8607.860202
## iter 10 value 3561.875666
## iter 20 value 3561.808840
## iter 30 value 3372.830025
## iter 40 value 3278.909837
## iter 50 value 3248.538016
## iter 60 value 3156.559620
## iter 70 value 3111.394436
## iter 80 value 3106.867094
## iter 90 value 3076.618386
## iter 100 value 3018.374358
## final value 3018.374358
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n4_NN1Fit0
## Neural Network
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11134, 11133
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.9449102 0.000000000
## 1 1e-04 0.9453294 0.019603585
## 1 1e-01 0.9464074 0.100201043
## 3 0e+00 0.9450299 0.004079951
## 3 1e-04 0.9449102 0.000000000
## 3 1e-01 0.9490420 0.191372598
## 5 0e+00 0.9449701 0.009531380
## 5 1e-04 0.9449102 0.000000000
## 5 1e-01 0.9491017 0.250196953
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9486258 0.1716326 Fold3
## 2 0.9507814 0.3033336 Fold1
## 3 0.9478980 0.2756247 Fold2
ad_tda_pc_5.50.5_n4_nn1_fit_re<-Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.05 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.04 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 1.62 0.00 0.08 -0.10 0.51 0.01 0.52 -0.05
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.45 0.19 0.01 0.00 0.13 0.81 0.52 0.09
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.48 0.23 0.05 0.07 0.66 -1.14 -0.26 -0.41
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.13 0.16 -0.21 0.58 -0.27 0.18 -0.03 -0.68
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.38 1.13 0.89 -0.25 0.09 -0.09 0.00 0.37
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.48 0.15 0.21 0.54 -0.23 0.14 0.48 -0.27
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.16 -0.26 0.82 0.86 0.09 0.00 0.91 0.33
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.57 0.40 0.56 -0.15 0.20 0.62 -0.12 1.75
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.01 -0.24 0.12 0.05 -0.04 0.04
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.05 -0.04 0.01 0.06 0.23 -0.16 0.10 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 -0.10 -0.03 0.00 0.03 -0.08 -0.04 0.01
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.07 -0.04 -0.08 -0.31 -0.01 -0.10 -0.01 -0.01
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.02 0.05 -0.04 0.53 -0.29 0.16 0.04 0.02
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.11 0.01 1.61 -0.02 -0.07
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.01 0.56 0.00 -0.01 0.02 0.00 -0.01 0.01
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.03 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 -0.01 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## -0.01 0.00 0.00 0.03 0.06 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.01
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -0.01 0.00 0.00 0.00 0.00 -0.01 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.01 0.00 0.01 0.00 0.00 0.01 0.00 -0.01
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.01
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.09 -0.07 0.72 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.01 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 -0.08 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 0.00 -0.02 0.00 0.00 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## -0.25 0.04 -0.07 0.00 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.08 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.01 0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o
## 0.23 0.23 -5.41 -0.29 1.68 -0.85
# Predict outcome using Adult_TDA_PC_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7340 1979
## >50K 76 373
##
## Accuracy : 0.7896
## 95% CI : (0.7814, 0.7977)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 5.314e-13
##
## Kappa : 0.205
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9898
## Specificity : 0.1586
## Pos Pred Value : 0.7876
## Neg Pred Value : 0.8307
## Prevalence : 0.7592
## Detection Rate : 0.7514
## Detection Prevalence : 0.9540
## Balanced Accuracy : 0.5742
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7340 1979
## >50K 76 373
##
## Accuracy : 0.7896
## 95% CI : (0.7814, 0.7977)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 5.314e-13
##
## Kappa : 0.205
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9898
## Specificity : 0.1586
## Pos Pred Value : 0.7876
## Neg Pred Value : 0.8307
## Prevalence : 0.7592
## Detection Rate : 0.7514
## Detection Prevalence : 0.9540
## Balanced Accuracy : 0.5742
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.896192e-01 2.049595e-01 7.813995e-01 7.976658e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.314162e-13 0.000000e+00
ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9897519 0.1585884 0.7876382
## Neg Pred Value Precision Recall
## 0.8307350 0.7876382 0.9897519
## F1 Prevalence Detection Rate
## 0.8772035 0.7592138 0.7514333
## Detection Prevalence Balanced Accuracy
## 0.9540336 0.5741702
ad_tda_pc_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n4_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n4_3_fold
## Accuracy
## 1 -0.1560761
## 2 -0.1409630
## 3 -0.1516358
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9918
##
## $winRope
## [1] 0.0082
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.999313
##
## $rope
## [1] 0.0001611837
##
## $right
## [1] 0.000525845
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
#bf_tda_pca_5.50.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold)
## t = -33.348, df = 2, p-value = 0.000898
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1688546 -0.1302621
## sample estimates:
## mean of x
## -0.1495583
### Test set diff
diff_tda_pca_5.50.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n4_test
## Accuracy
## -0.02375102
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_test_odds.left<-bst_tda_pca_5.50.5_nn1.n4_test$probLeft/bst_tda_pca_5.50.5_nn1.n4_test$probRight
bst_tda_pca_5.50.5_nn1.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_test
## $winLeft
## [1] 0.8415333
##
## $winRope
## [1] 0.1584667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_PC_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 8140.345479
## final value 3684.136149
## converged
## # weights: 331
## initial value 6995.676901
## final value 3684.136149
## converged
## # weights: 551
## initial value 16142.839292
## final value 3684.136149
## converged
## # weights: 111
## initial value 5273.940835
## iter 10 value 166.040071
## iter 20 value 141.482625
## iter 30 value 129.869578
## iter 40 value 127.672163
## iter 50 value 127.592083
## iter 60 value 127.582218
## iter 70 value 127.581954
## iter 80 value 127.581491
## final value 127.581486
## converged
## # weights: 331
## initial value 5104.007497
## iter 10 value 344.683942
## iter 20 value 158.553864
## iter 30 value 144.729055
## iter 40 value 142.163156
## iter 50 value 141.888125
## iter 60 value 141.884665
## final value 141.884654
## converged
## # weights: 551
## initial value 4065.969904
## iter 10 value 279.814072
## iter 20 value 158.080398
## iter 30 value 144.442621
## iter 40 value 144.415600
## iter 50 value 144.227084
## iter 60 value 138.602899
## iter 70 value 132.219208
## iter 80 value 132.215246
## iter 90 value 132.208216
## iter 100 value 132.199930
## final value 132.199930
## stopped after 100 iterations
## # weights: 111
## initial value 4863.301460
## iter 10 value 3684.161125
## final value 143.468803
## converged
## # weights: 331
## initial value 3871.763688
## iter 10 value 168.167637
## iter 20 value 143.468733
## iter 30 value 143.465361
## iter 40 value 143.460184
## final value 143.460041
## converged
## # weights: 551
## initial value 1810.874698
## iter 10 value 143.473342
## iter 20 value 143.464590
## final value 143.464585
## converged
## # weights: 111
## initial value 5366.555604
## final value 3499.929341
## converged
## # weights: 331
## initial value 9109.383314
## final value 3499.929341
## converged
## # weights: 551
## initial value 5006.613833
## final value 3499.929341
## converged
## # weights: 111
## initial value 6029.750183
## iter 10 value 209.135200
## iter 20 value 141.551286
## iter 30 value 141.095911
## final value 141.095025
## converged
## # weights: 331
## initial value 9410.118111
## iter 10 value 657.145317
## iter 20 value 341.454146
## iter 30 value 324.024718
## iter 40 value 141.097836
## iter 50 value 141.097172
## iter 50 value 141.097172
## iter 60 value 139.178491
## iter 70 value 128.168223
## iter 80 value 120.280241
## final value 120.279477
## converged
## # weights: 551
## initial value 4188.124093
## iter 10 value 335.069541
## iter 20 value 256.282246
## iter 30 value 216.991302
## iter 40 value 180.439321
## iter 50 value 175.641101
## iter 60 value 131.184524
## iter 70 value 106.424319
## iter 80 value 105.710188
## iter 90 value 104.890573
## iter 100 value 93.035373
## final value 93.035373
## stopped after 100 iterations
## # weights: 111
## initial value 9749.368625
## iter 10 value 3499.973651
## final value 137.267351
## converged
## # weights: 331
## initial value 8361.939043
## iter 10 value 138.908253
## iter 20 value 137.266538
## iter 30 value 137.262973
## final value 137.262947
## converged
## # weights: 551
## initial value 9272.388676
## iter 10 value 3499.980225
## iter 20 value 137.270897
## iter 30 value 137.262677
## final value 137.262624
## converged
## # weights: 111
## initial value 4893.252542
## final value 3499.929341
## converged
## # weights: 331
## initial value 4306.593417
## final value 3499.929341
## converged
## # weights: 551
## initial value 7518.358577
## final value 3499.929341
## converged
## # weights: 111
## initial value 6847.393971
## iter 10 value 298.550134
## iter 20 value 141.712836
## iter 30 value 139.692939
## iter 40 value 139.191466
## iter 50 value 139.188095
## final value 139.188085
## converged
## # weights: 331
## initial value 7290.372493
## iter 10 value 196.396755
## iter 20 value 139.837125
## iter 30 value 139.195614
## iter 40 value 139.186471
## iter 50 value 138.541517
## iter 60 value 127.397522
## iter 70 value 122.548703
## iter 80 value 122.452490
## iter 90 value 122.028228
## iter 100 value 121.671367
## final value 121.671367
## stopped after 100 iterations
## # weights: 551
## initial value 2545.235785
## iter 10 value 140.142632
## iter 20 value 139.144478
## iter 30 value 138.045749
## iter 40 value 138.028370
## iter 50 value 138.018536
## iter 60 value 137.986209
## iter 70 value 137.906716
## iter 80 value 137.906600
## iter 90 value 137.906253
## iter 100 value 137.906098
## final value 137.906098
## stopped after 100 iterations
## # weights: 111
## initial value 4942.055139
## iter 10 value 3500.312217
## final value 137.267121
## converged
## # weights: 331
## initial value 8756.743013
## iter 10 value 154.377204
## iter 20 value 137.268065
## iter 30 value 137.263659
## final value 137.263594
## converged
## # weights: 551
## initial value 3260.654554
## iter 10 value 137.273845
## iter 20 value 137.268184
## iter 30 value 137.207772
## iter 40 value 131.464655
## iter 50 value 128.101129
## iter 60 value 124.182892
## iter 70 value 122.693824
## final value 122.692758
## converged
## # weights: 111
## initial value 4639.640062
## iter 10 value 216.354776
## final value 212.827108
## converged
Adult_TDA_PC_5.50.5_n5_NN1Fit0
## Neural Network
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9604, 9602, 9602
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.9979867 0.0000000
## 1 1e-04 0.9979867 0.0000000
## 1 1e-01 0.9979867 0.0000000
## 3 0e+00 0.9979867 0.0000000
## 3 1e-04 0.9979867 0.0000000
## 3 1e-01 0.9979867 0.0000000
## 5 0e+00 0.9979867 0.0000000
## 5 1e-04 0.9979867 0.0000000
## 5 1e-01 0.9979173 0.0510107
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 1 and decay = 0.1.
Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9981250 0 Fold1
## 2 0.9979175 0 Fold2
## 3 0.9979175 0 Fold3
ad_tda_pc_5.50.5_n5_nn1_fit_re<-Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_NN1Fit0)
## a 108-1-1 network with 111 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o
## -6.16 0.00
# Predict outcome using Adult_TDA_PC_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n5_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n5_3_fold
## Accuracy
## 1 -0.2055753
## 2 -0.1880992
## 3 -0.2016554
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9914667
##
## $winRope
## [1] 0.008533333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.9994746
##
## $rope
## [1] 9.586431e-05
##
## $right
## [1] 0.0004295431
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
#bf_tda_pca_5.50.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold)
## t = -37.482, df = 2, p-value = 0.000711
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2212232 -0.1756633
## sample estimates:
## mean of x
## -0.1984433
### Test set diff
diff_tda_pca_5.50.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n5_test
## Accuracy
## 0.006654382
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_test_odds.left<-bst_tda_pca_5.50.5_nn1.n5_test$probLeft/bst_tda_pca_5.50.5_nn1.n5_test$probRight
bst_tda_pca_5.50.5_nn1.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_KDE_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 5526.698305
## final value 5106.371841
## converged
## # weights: 331
## initial value 5248.781173
## iter 10 value 5098.269886
## final value 5098.264916
## converged
## # weights: 551
## initial value 7455.173102
## iter 10 value 5092.857774
## final value 5092.853842
## converged
## # weights: 111
## initial value 6443.035962
## iter 10 value 5106.427022
## iter 10 value 5106.427011
## iter 10 value 5106.427011
## final value 5106.427011
## converged
## # weights: 331
## initial value 5912.284564
## iter 10 value 4900.323691
## iter 20 value 4666.332674
## iter 30 value 4609.708910
## iter 40 value 4554.979252
## iter 50 value 4536.200380
## iter 60 value 4427.575758
## iter 70 value 4380.027565
## iter 80 value 4369.771840
## final value 4369.425596
## converged
## # weights: 551
## initial value 5765.977215
## iter 10 value 4984.034887
## iter 20 value 4975.288246
## iter 30 value 4889.093784
## iter 40 value 4795.791603
## iter 50 value 4445.370098
## iter 60 value 4411.188085
## iter 70 value 4407.932530
## iter 80 value 4378.478831
## iter 90 value 3771.155084
## iter 100 value 3253.929669
## final value 3253.929669
## stopped after 100 iterations
## # weights: 111
## initial value 6893.521399
## iter 10 value 5105.154162
## final value 5105.136147
## converged
## # weights: 331
## initial value 5688.616772
## iter 10 value 5098.281703
## final value 5098.280574
## converged
## # weights: 551
## initial value 10018.499202
## iter 10 value 5086.126392
## final value 5086.108199
## converged
## # weights: 111
## initial value 5636.473977
## iter 10 value 5095.861179
## final value 5095.859120
## converged
## # weights: 331
## initial value 6699.734921
## iter 10 value 5098.591097
## final value 5098.564222
## converged
## # weights: 551
## initial value 5294.539914
## iter 10 value 5015.483885
## final value 5015.473289
## converged
## # weights: 111
## initial value 6861.999373
## iter 10 value 5108.080546
## final value 5108.077114
## converged
## # weights: 331
## initial value 7555.132131
## iter 10 value 5108.062099
## iter 20 value 5097.497667
## iter 30 value 4663.353197
## iter 40 value 4646.782252
## iter 50 value 4540.362684
## iter 60 value 4477.124651
## iter 70 value 4440.796368
## iter 80 value 4436.604599
## iter 90 value 4436.101803
## iter 100 value 4416.394894
## final value 4416.394894
## stopped after 100 iterations
## # weights: 551
## initial value 5113.853322
## iter 10 value 5094.648420
## iter 20 value 4696.882502
## iter 30 value 4612.188554
## iter 40 value 4597.323036
## iter 50 value 4521.838321
## iter 60 value 4486.918271
## iter 70 value 4332.145529
## iter 80 value 4296.410882
## iter 90 value 4024.071819
## iter 100 value 3888.071389
## final value 3888.071389
## stopped after 100 iterations
## # weights: 111
## initial value 5089.661545
## final value 5023.733875
## converged
## # weights: 331
## initial value 6369.146671
## iter 10 value 5097.241225
## final value 5097.234305
## converged
## # weights: 551
## initial value 5438.701803
## final value 5087.762402
## converged
## # weights: 111
## initial value 5406.082632
## iter 10 value 5069.842041
## final value 5069.841978
## converged
## # weights: 331
## initial value 5891.953652
## iter 10 value 5098.274196
## final value 5098.264913
## converged
## # weights: 551
## initial value 5079.150774
## final value 4973.467272
## converged
## # weights: 111
## initial value 5456.756104
## iter 10 value 5106.461195
## final value 5106.428428
## converged
## # weights: 331
## initial value 5638.489381
## iter 10 value 5085.600093
## iter 20 value 4753.374219
## iter 30 value 4549.825798
## iter 40 value 4498.514602
## iter 50 value 4454.672100
## iter 60 value 4313.062623
## iter 70 value 4056.540260
## iter 80 value 3620.227700
## iter 90 value 3442.961651
## iter 100 value 3354.111328
## final value 3354.111328
## stopped after 100 iterations
## # weights: 551
## initial value 5850.303955
## iter 10 value 4963.364954
## iter 20 value 4913.600479
## iter 30 value 4726.232618
## iter 40 value 4697.743133
## iter 50 value 4654.011990
## iter 60 value 4596.259691
## iter 70 value 4572.060065
## iter 80 value 4436.110707
## iter 90 value 4404.714496
## iter 100 value 4375.445317
## final value 4375.445317
## stopped after 100 iterations
## # weights: 111
## initial value 5229.182132
## final value 5106.373518
## converged
## # weights: 331
## initial value 5953.190145
## iter 10 value 5028.164818
## iter 20 value 5026.861945
## final value 5026.861499
## converged
## # weights: 551
## initial value 8041.185505
## final value 5067.122600
## converged
## # weights: 551
## initial value 10963.089329
## iter 10 value 7498.318795
## iter 20 value 7054.542786
## iter 30 value 6813.553502
## iter 40 value 6790.106568
## iter 50 value 6784.170997
## iter 60 value 6779.054410
## iter 70 value 6776.057442
## iter 80 value 6744.074435
## iter 90 value 6572.303336
## iter 100 value 6364.221561
## final value 6364.221561
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n1_NN1Fit0
## Neural Network
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8924, 8926, 8924
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.7422126 0.008078572
## 1 1e-04 0.7436327 0.016255750
## 1 1e-01 0.7407933 0.000000000
## 3 0e+00 0.7414657 0.003837379
## 3 1e-04 0.7431088 0.013134662
## 3 1e-01 0.8012242 0.407262917
## 5 0e+00 0.7481888 0.042258084
## 5 1e-04 0.7431838 0.013601123
## 5 1e-01 0.8110834 0.422526001
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7985660 0.3541570 Fold3
## 2 0.8371051 0.6044316 Fold1
## 3 0.7975790 0.3089894 Fold2
ad_tda_kde_5.50.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.29 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## -0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.32 2.23 0.24 0.06 0.05 0.00 0.15 -0.10
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## -0.09 0.00 0.00 0.00 0.14 0.16 -0.13 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 -0.01 0.00 -0.17 0.15 -0.06 -0.02 -0.09
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.02 0.00 0.05 0.29 2.37 -0.03 0.00 0.08
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.35 -0.04 -0.04 0.24 0.00 0.00 0.07
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.14 -0.03 0.01 -0.10 -0.06 0.00 0.30 0.09
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## -0.19 0.00 -0.17 0.07 0.09 -0.14 0.26 0.03
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.02 0.00 0.11 -0.01 0.00 0.22 -0.14 0.45
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## -0.09 0.00 5.65 -0.15 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.18 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.10 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 -0.04 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 -0.01 0.00 0.00 0.05 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.44 0.00 -0.26
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## -0.04 -1.55 0.00 0.04 0.00 0.00 -0.01 -0.03
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.04 0.00 0.00 0.00 -0.02 0.01 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 -0.02 0.00 0.00 0.01 -0.05 0.03 -0.03
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.03 0.00 0.00 0.00 -0.09 -0.02 0.00 0.07
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 -0.09 0.00 0.00 0.00 -0.02 0.00 0.01
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## -0.01 -0.03 -0.02 0.00 0.00 0.00 0.01 -0.01
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.05 0.00 -0.01 0.07 -0.07 -0.04 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## -0.01 0.00 0.01 0.00 0.00 -0.06 -0.07 0.03
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.11 0.00 -1.05 0.00 0.01 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 -0.05 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## -0.22 -0.04 -0.06 0.04 0.02 0.00 -0.25 0.03
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 0.00 -0.09 -0.16 -0.02 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.01 -0.04 -0.03 -0.02 -0.02 0.01 0.07 -0.02
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.10 0.00 0.08 -0.09 0.13 -0.06 0.00 0.23
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## -0.01 -0.34 0.00 -0.04 -0.05 -0.05 0.00 -0.05
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.10 -0.01 -0.03 -0.04 -0.12 -0.01 0.14 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## -0.06 -0.01 -0.04 0.23 -0.19 -0.02 -0.20 -0.05
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.01 -0.02 0.00 0.00 -0.01 -0.19 -0.32 0.09
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 0.01 0.01 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.05 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 -0.01 0.01
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 -0.27 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o
## 1.43 1.43 -1.47 -2.93 0.68 1.08
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7373 1914
## >50K 43 438
##
## Accuracy : 0.7997
## 95% CI : (0.7916, 0.8076)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2477
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9942
## Specificity : 0.1862
## Pos Pred Value : 0.7939
## Neg Pred Value : 0.9106
## Prevalence : 0.7592
## Detection Rate : 0.7548
## Detection Prevalence : 0.9508
## Balanced Accuracy : 0.5902
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7373 1914
## >50K 43 438
##
## Accuracy : 0.7997
## 95% CI : (0.7916, 0.8076)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2477
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9942
## Specificity : 0.1862
## Pos Pred Value : 0.7939
## Neg Pred Value : 0.9106
## Prevalence : 0.7592
## Detection Rate : 0.7548
## Detection Prevalence : 0.9508
## Balanced Accuracy : 0.5902
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.996519e-01 2.477023e-01 7.915743e-01 8.075505e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.963235e-22 0.000000e+00
ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9942017 0.1862245 0.7939055
## Neg Pred Value Precision Recall
## 0.9106029 0.7939055 0.9942017
## F1 Prevalence Detection Rate
## 0.8828354 0.7592138 0.7548116
## Detection Prevalence Balanced Accuracy
## 0.9507576 0.5902131
ad_tda_kde_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n1_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n1_3_fold
## Accuracy
## 1 -0.006016296
## 2 -0.027286713
## 3 -0.001316844
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.4184
##
## $winRope
## [1] 0.5816
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.5586103
##
## $rope
## [1] 0.3690579
##
## $right
## [1] 0.07233177
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
#bf_tda_kde_5.50.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold)
## t = -1.4444, df = 2, p-value = 0.2855
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.04591557 0.02283567
## sample estimates:
## mean of x
## -0.01153995
### Test set diff
diff_tda_kde_5.50.5_nn1.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n1_test
## Accuracy
## 0.0513923
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_test_odds.left<-bst_tda_kde_5.50.5_nn1.n1_test$probLeft/bst_tda_kde_5.50.5_nn1.n1_test$probRight
bst_tda_kde_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1605333
##
## $winRight
## [1] 0.8394667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n1_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test))
##Node2
#Neural Network 1
Adult_TDA_KDE_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 5930.683450
## iter 10 value 4245.284346
## final value 4245.201290
## converged
## # weights: 331
## initial value 6177.305703
## iter 10 value 4558.071222
## final value 4558.069553
## converged
## # weights: 551
## initial value 6763.207906
## final value 4536.086163
## converged
## # weights: 111
## initial value 5432.666658
## iter 10 value 4569.717617
## final value 4569.714463
## converged
## # weights: 331
## initial value 6147.890732
## iter 10 value 4536.136799
## iter 20 value 4230.299486
## iter 30 value 3723.255967
## iter 40 value 3040.259416
## iter 50 value 2876.935687
## iter 60 value 2755.809998
## iter 70 value 2667.253969
## iter 80 value 2626.619230
## iter 90 value 2605.630950
## iter 100 value 2596.702578
## final value 2596.702578
## stopped after 100 iterations
## # weights: 551
## initial value 13190.555196
## iter 10 value 4540.636995
## iter 20 value 4280.772679
## iter 30 value 4215.750301
## iter 40 value 4183.082260
## iter 50 value 4174.225978
## iter 60 value 4172.830365
## iter 70 value 4149.006510
## iter 80 value 4094.267350
## iter 90 value 4038.300268
## iter 100 value 3611.170139
## final value 3611.170139
## stopped after 100 iterations
## # weights: 111
## initial value 6189.887267
## iter 10 value 4561.954327
## final value 4561.950369
## converged
## # weights: 331
## initial value 4571.984179
## final value 4569.673648
## converged
## # weights: 551
## initial value 7086.593553
## iter 10 value 4560.672329
## final value 4560.668546
## converged
## # weights: 111
## initial value 6368.448984
## final value 4569.667984
## converged
## # weights: 331
## initial value 6869.756319
## iter 10 value 4552.907759
## final value 4552.905853
## converged
## # weights: 551
## initial value 4581.189338
## iter 10 value 4563.231076
## final value 4563.227810
## converged
## # weights: 111
## initial value 9105.335753
## iter 10 value 4569.760549
## iter 20 value 4569.714993
## final value 4569.714517
## converged
## # weights: 331
## initial value 5016.469773
## iter 10 value 4286.211602
## iter 20 value 4232.321762
## iter 30 value 4192.595077
## iter 40 value 4133.398231
## iter 50 value 4119.000614
## iter 60 value 3995.410127
## iter 70 value 3587.180721
## iter 80 value 3128.014790
## iter 90 value 3058.844529
## iter 100 value 2954.094340
## final value 2954.094340
## stopped after 100 iterations
## # weights: 551
## initial value 4672.344253
## iter 10 value 4569.950375
## iter 20 value 4569.701868
## final value 4569.699022
## converged
## # weights: 111
## initial value 6983.164070
## final value 4569.670079
## converged
## # weights: 331
## initial value 8836.452966
## iter 10 value 4500.327090
## final value 4500.267674
## converged
## # weights: 551
## initial value 5061.019809
## final value 4530.966531
## converged
## # weights: 111
## initial value 7247.744150
## iter 10 value 4567.424743
## final value 4567.415698
## converged
## # weights: 331
## initial value 6033.259818
## iter 10 value 4545.477051
## final value 4545.470615
## converged
## # weights: 551
## initial value 5243.738886
## final value 4545.476644
## converged
## # weights: 111
## initial value 5493.113260
## iter 10 value 4568.752349
## final value 4568.750038
## converged
## # weights: 331
## initial value 9145.208968
## iter 10 value 4568.764395
## iter 20 value 4568.734837
## final value 4568.734565
## converged
## # weights: 551
## initial value 4797.094928
## iter 10 value 4567.944042
## iter 20 value 4551.149530
## iter 30 value 4229.303227
## iter 40 value 4187.004430
## iter 50 value 4175.337976
## iter 60 value 4159.809359
## iter 70 value 4156.397556
## iter 80 value 4153.754334
## iter 90 value 4089.733266
## iter 100 value 3831.012797
## final value 3831.012797
## stopped after 100 iterations
## # weights: 111
## initial value 8212.545037
## final value 4568.705829
## converged
## # weights: 331
## initial value 5869.733883
## iter 10 value 4567.439861
## final value 4567.430222
## converged
## # weights: 551
## initial value 4704.895635
## iter 10 value 4368.942914
## final value 4368.932833
## converged
## # weights: 331
## initial value 6962.811008
## iter 10 value 6718.625618
## iter 20 value 6693.873473
## iter 30 value 6520.663645
## iter 40 value 6390.530236
## iter 50 value 6306.136667
## iter 60 value 6270.485697
## iter 70 value 6129.785849
## iter 80 value 5901.303754
## iter 90 value 5184.129302
## iter 100 value 5010.874846
## final value 5010.874846
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n2_NN1Fit0
## Neural Network
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7756, 7756, 7756
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.7359464 0.069090191
## 1 1e-04 0.7243424 0.001801554
## 1 1e-01 0.7239986 0.000000000
## 3 0e+00 0.7258037 0.009441320
## 3 1e-04 0.7260615 0.010987016
## 3 1e-01 0.7835654 0.367469242
## 5 0e+00 0.7265773 0.013448250
## 5 1e-04 0.7337975 0.051698650
## 5 1e-01 0.7539969 0.172924769
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8269727 0.5552632 Fold1
## 2 0.7998969 0.5471445 Fold2
## 3 0.7238267 0.0000000 Fold3
ad_tda_kde_5.50.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.18 0.18 -2.47 3.77 -0.35 -0.03 0.84 3.49
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -4.36 -0.74 0.03 0.00 0.00 -0.28 -5.34 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 7.40 4.03 12.79 0.00 -1.94
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -18.76 0.00 0.00 2.28 -2.62 -0.94 0.59 4.24
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.63 -0.69 -1.45 -0.94 -2.50 0.13 -0.18 0.53
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 3.84 -8.50 -2.26 0.80 -3.95 0.10 1.96 5.20
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 2.11 2.03 0.86 1.16 -0.81 -0.53 -2.17 -1.77
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 4.30 -2.39 2.41 1.50 -2.63 1.29 0.39 -0.21
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.18 -1.80 0.86 1.17 -0.20 -0.11
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.16 -0.25 -0.70 0.07 1.80 0.61 2.11 -1.28
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.44 -0.08 0.00 0.08 -0.09 -0.13 -0.34 0.43
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.45 -0.60 0.45 0.19 -0.10 -1.28 0.16 0.04
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.16 2.85 -0.01 -0.27 -0.63 -0.02 0.53 -0.90
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.12 -0.03 -0.66 -1.68 0.08
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.64 1.70 0.08 0.26 -0.40 0.00 0.17 0.07
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.65 -0.26 0.07 0.00 0.00 0.00 0.02 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 -0.11 -0.01 2.13 0.00 -0.43
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -1.57 0.00 0.00 0.60 6.63 -1.60 0.00 2.80
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 -0.10 0.27 -0.74 0.08 -0.58 0.00 0.46
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.79 0.48 0.07 0.01 0.13 0.00 0.92 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -0.34 0.19 0.01 1.57 -0.88 -0.27 0.23 -1.24
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 1.23 0.01 0.00 -0.19 0.03 0.79 0.85 -0.21
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## -0.10 0.00 6.29 -0.17 0.00 0.04 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.07 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.81 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## 0.96 2.60 -4.09 0.95
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6131 967
## >50K 1285 1385
##
## Accuracy : 0.7695
## 95% CI : (0.761, 0.7778)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.009036
##
## Kappa : 0.3972
##
## Mcnemar's Test P-Value : 2.39e-11
##
## Sensitivity : 0.8267
## Specificity : 0.5889
## Pos Pred Value : 0.8638
## Neg Pred Value : 0.5187
## Prevalence : 0.7592
## Detection Rate : 0.6277
## Detection Prevalence : 0.7267
## Balanced Accuracy : 0.7078
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6131 967
## >50K 1285 1385
##
## Accuracy : 0.7695
## 95% CI : (0.761, 0.7778)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.009036
##
## Kappa : 0.3972
##
## Mcnemar's Test P-Value : 2.39e-11
##
## Sensitivity : 0.8267
## Specificity : 0.5889
## Pos Pred Value : 0.8638
## Neg Pred Value : 0.5187
## Prevalence : 0.7592
## Detection Rate : 0.6277
## Detection Prevalence : 0.7267
## Balanced Accuracy : 0.7078
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.694513e-01 3.972488e-01 7.609679e-01 7.777737e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.035912e-03 2.389772e-11
ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8267260 0.5888605 0.8637644
## Neg Pred Value Precision Recall
## 0.5187266 0.8637644 0.8267260
## F1 Prevalence Detection Rate
## 0.8448395 0.7592138 0.6276618
## Detection Prevalence Balanced Accuracy
## 0.7266585 0.7077933
ad_tda_kde_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n2_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n2_3_fold
## Accuracy
## 1 -0.034422976
## 2 0.009921519
## 3 0.072435459
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0.3036333
##
## $winRope
## [1] 0.2013333
##
## $winRight
## [1] 0.4950333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.2716939
##
## $rope
## [1] 0.1696608
##
## $right
## [1] 0.5586452
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
#bf_tda_kde_5.50.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold)
## t = 0.51549, df = 2, p-value = 0.6575
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1173855 0.1493415
## sample estimates:
## mean of x
## 0.015978
### Test set diff
diff_tda_kde_5.50.5_nn1.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n2_test
## Accuracy
## 0.08159296
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_test_odds.left<-bst_tda_kde_5.50.5_nn1.n2_test$probLeft/bst_tda_kde_5.50.5_nn1.n2_test$probRight
bst_tda_kde_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1581333
##
## $winRight
## [1] 0.8418667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n2_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test))
##Node3
#Neural Network 1
Adult_TDA_KDE_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 5111.768353
## final value 4569.990984
## converged
## # weights: 331
## initial value 6113.230122
## iter 10 value 4567.422226
## final value 4567.415688
## converged
## # weights: 551
## initial value 5390.153672
## final value 4569.990984
## converged
## # weights: 111
## initial value 5858.802256
## iter 10 value 4569.862744
## iter 20 value 4569.493569
## iter 20 value 4569.493565
## iter 20 value 4569.493545
## final value 4569.493545
## converged
## # weights: 331
## initial value 6291.630396
## iter 10 value 4285.824677
## iter 20 value 4264.917247
## iter 30 value 4261.403305
## iter 40 value 4192.978770
## iter 50 value 4124.465058
## iter 60 value 4011.515063
## iter 70 value 3721.076385
## iter 80 value 3176.785477
## iter 90 value 2978.231608
## iter 100 value 2854.006342
## final value 2854.006342
## stopped after 100 iterations
## # weights: 551
## initial value 4743.371175
## iter 10 value 4535.542791
## iter 20 value 4278.385005
## iter 30 value 4256.005933
## iter 40 value 4252.347978
## iter 50 value 4247.616447
## iter 60 value 4157.823899
## iter 70 value 4100.851549
## iter 80 value 4080.399334
## iter 90 value 4073.240521
## iter 100 value 4062.148586
## final value 4062.148586
## stopped after 100 iterations
## # weights: 111
## initial value 5870.725150
## iter 10 value 4504.513623
## final value 4504.512577
## converged
## # weights: 331
## initial value 5056.378901
## final value 4544.198727
## converged
## # weights: 551
## initial value 4827.363092
## iter 10 value 4264.968614
## iter 20 value 4251.609850
## iter 30 value 4226.223515
## iter 40 value 4197.273919
## iter 50 value 4126.666326
## iter 60 value 4090.425997
## iter 70 value 4086.823980
## iter 80 value 4086.779035
## iter 90 value 4086.762395
## iter 100 value 4086.734092
## final value 4086.734092
## stopped after 100 iterations
## # weights: 111
## initial value 5057.833550
## final value 4521.809004
## converged
## # weights: 331
## initial value 4647.455245
## final value 4568.380621
## converged
## # weights: 551
## initial value 5119.972994
## iter 10 value 4567.105034
## final value 4567.092943
## converged
## # weights: 111
## initial value 5825.356477
## iter 10 value 4567.868211
## iter 20 value 4327.892843
## iter 30 value 4326.095332
## iter 40 value 4275.518987
## iter 50 value 4275.513270
## iter 60 value 4275.483372
## iter 70 value 4260.705445
## iter 80 value 4249.317515
## iter 90 value 4249.311471
## final value 4249.311070
## converged
## # weights: 331
## initial value 6170.957433
## iter 10 value 4529.055743
## iter 20 value 4297.748446
## iter 30 value 4296.307591
## iter 40 value 4296.279113
## iter 50 value 4295.901059
## iter 60 value 4295.753028
## iter 70 value 4260.181726
## iter 80 value 4259.593290
## final value 4259.515332
## converged
## # weights: 551
## initial value 5390.350278
## iter 10 value 4552.917903
## iter 20 value 4550.842158
## iter 30 value 4335.217152
## iter 40 value 4261.673945
## iter 50 value 4136.906258
## iter 60 value 3475.642467
## iter 70 value 3258.259101
## iter 80 value 3218.795898
## iter 90 value 3201.442504
## iter 100 value 2993.550390
## final value 2993.550390
## stopped after 100 iterations
## # weights: 111
## initial value 6949.952341
## iter 10 value 4556.803008
## final value 4556.798192
## converged
## # weights: 331
## initial value 5069.853912
## final value 4568.389443
## converged
## # weights: 551
## initial value 7425.933036
## final value 4551.635124
## converged
## # weights: 111
## initial value 5433.338943
## final value 4569.667984
## converged
## # weights: 331
## initial value 4618.390858
## final value 4569.667984
## converged
## # weights: 551
## initial value 6640.766715
## final value 4527.005257
## converged
## # weights: 111
## initial value 4854.557834
## iter 10 value 4569.716558
## final value 4569.714471
## converged
## # weights: 331
## initial value 5574.629260
## iter 10 value 4305.357985
## iter 20 value 4271.507367
## iter 30 value 4246.195947
## iter 40 value 4199.713445
## iter 50 value 4070.178676
## iter 60 value 3969.948565
## iter 70 value 3904.249891
## iter 80 value 3887.178709
## iter 90 value 3869.052215
## iter 100 value 3853.483271
## final value 3853.483271
## stopped after 100 iterations
## # weights: 551
## initial value 5103.624486
## iter 10 value 4362.400626
## iter 20 value 4247.699437
## iter 30 value 4244.516902
## iter 40 value 4239.470383
## iter 50 value 4235.615855
## iter 60 value 4230.427499
## iter 70 value 4227.536574
## iter 80 value 4192.811040
## iter 90 value 4146.522688
## iter 100 value 4078.415094
## final value 4078.415094
## stopped after 100 iterations
## # weights: 111
## initial value 4607.420878
## final value 4569.669911
## converged
## # weights: 331
## initial value 7367.058986
## final value 4540.006561
## converged
## # weights: 551
## initial value 4576.125759
## final value 4555.511492
## converged
## # weights: 331
## initial value 6861.407035
## iter 10 value 6854.061122
## iter 20 value 6854.050875
## iter 20 value 6854.050852
## iter 20 value 6854.050850
## final value 6854.050850
## converged
Adult_TDA_KDE_5.50.5_n3_NN1Fit0
## Neural Network
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7757, 7755, 7756
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.7253736 0.007432493
## 1 1e-04 0.7266638 0.014618712
## 1 1e-01 0.7362011 0.066919109
## 3 0e+00 0.7239986 0.000000000
## 3 1e-04 0.7259759 0.010321338
## 3 1e-01 0.7924245 0.373825954
## 5 0e+00 0.7251160 0.005833098
## 5 1e-04 0.7398181 0.078811780
## 5 1e-01 0.7862277 0.369429172
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8279598 0.5891119 Fold1
## 2 0.7615365 0.2138898 Fold2
## 3 0.7877772 0.3184761 Fold3
ad_tda_kde_5.50.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## -0.32 -0.32 0.00 -0.32
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n3_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n3_3_fold
## Accuracy
## 1 -0.03541007
## 2 0.04828189
## 3 0.00848497
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0.3455333
##
## $winRope
## [1] 0.3024
##
## $winRight
## [1] 0.3520667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.3010407
##
## $rope
## [1] 0.2353607
##
## $right
## [1] 0.4635985
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
#bf_tda_kde_5.50.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold)
## t = 0.29454, df = 2, p-value = 0.7961
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.09687379 0.11111165
## sample estimates:
## mean of x
## 0.007118931
### Test set diff
diff_tda_kde_5.50.5_nn1.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n3_test
## Accuracy
## 0.09183047
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_test_odds.left<-bst_tda_kde_5.50.5_nn1.n3_test$probLeft/bst_tda_kde_5.50.5_nn1.n3_test$probRight
bst_tda_kde_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1610333
##
## $winRight
## [1] 0.8389667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n3_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_KDE_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 5066.467343
## iter 10 value 3207.383817
## final value 3207.381382
## converged
## # weights: 331
## initial value 4589.421276
## iter 10 value 3422.946660
## final value 3422.939877
## converged
## # weights: 551
## initial value 4604.489331
## iter 10 value 3422.965000
## final value 3422.939875
## converged
## # weights: 111
## initial value 4140.704516
## iter 10 value 3430.949158
## iter 20 value 3426.041741
## iter 30 value 3396.476475
## iter 40 value 3200.964362
## iter 50 value 3186.212937
## iter 60 value 3181.558674
## iter 70 value 3173.016967
## iter 80 value 3172.808814
## iter 90 value 3169.732589
## iter 100 value 3168.308596
## final value 3168.308596
## stopped after 100 iterations
## # weights: 331
## initial value 9119.712345
## iter 10 value 3430.820735
## final value 3430.816255
## converged
## # weights: 551
## initial value 4762.434901
## iter 10 value 3408.502221
## iter 20 value 3399.148089
## iter 30 value 3199.151884
## iter 40 value 3183.265668
## iter 50 value 3162.873674
## iter 60 value 3158.124753
## iter 70 value 3154.837865
## iter 80 value 3150.864466
## iter 90 value 3147.284386
## iter 100 value 3090.083991
## final value 3090.083991
## stopped after 100 iterations
## # weights: 111
## initial value 5110.863447
## final value 3430.773967
## converged
## # weights: 331
## initial value 6727.564802
## final value 3430.777753
## converged
## # weights: 551
## initial value 3618.793346
## final value 3405.685457
## converged
## # weights: 111
## initial value 5063.069780
## final value 3432.571375
## converged
## # weights: 331
## initial value 5940.993863
## iter 10 value 3431.024651
## final value 3431.006520
## converged
## # weights: 551
## initial value 5062.121491
## final value 3432.571375
## converged
## # weights: 111
## initial value 5451.668315
## iter 10 value 3432.749492
## iter 20 value 3338.812356
## iter 30 value 3224.438636
## iter 40 value 3224.231135
## iter 50 value 3224.119819
## iter 60 value 3223.737278
## iter 70 value 3208.759746
## iter 80 value 3172.877134
## iter 90 value 3158.238246
## iter 100 value 3122.697750
## final value 3122.697750
## stopped after 100 iterations
## # weights: 331
## initial value 4731.073515
## iter 10 value 3432.668497
## iter 20 value 3432.543345
## iter 30 value 3235.898568
## iter 40 value 3194.175044
## iter 50 value 3182.872083
## iter 60 value 3177.143853
## iter 70 value 3173.572455
## iter 80 value 3173.106229
## iter 90 value 3169.662369
## iter 100 value 3134.153695
## final value 3134.153695
## stopped after 100 iterations
## # weights: 551
## initial value 7808.738296
## iter 10 value 3412.016828
## iter 20 value 3224.941077
## iter 30 value 3146.632095
## iter 40 value 3130.870233
## iter 50 value 3130.351973
## iter 60 value 3100.803910
## iter 70 value 3061.853786
## iter 80 value 3054.181436
## iter 90 value 3015.615356
## iter 100 value 2807.827133
## final value 2807.827133
## stopped after 100 iterations
## # weights: 111
## initial value 4043.381651
## final value 3409.054784
## converged
## # weights: 331
## initial value 5343.940186
## final value 3432.576561
## converged
## # weights: 551
## initial value 4851.304525
## final value 3432.580514
## converged
## # weights: 111
## initial value 3707.648048
## final value 3431.006503
## converged
## # weights: 331
## initial value 4888.830458
## final value 3431.006503
## converged
## # weights: 551
## initial value 3814.909474
## final value 3431.006503
## converged
## # weights: 111
## initial value 3742.709716
## iter 10 value 3412.809404
## iter 20 value 3216.662395
## iter 30 value 3152.064770
## iter 40 value 3138.103881
## iter 50 value 3115.815351
## iter 60 value 3101.867654
## iter 70 value 2823.325356
## iter 80 value 2459.339549
## iter 90 value 2355.449337
## iter 100 value 2328.855864
## final value 2328.855864
## stopped after 100 iterations
## # weights: 331
## initial value 4322.143356
## iter 10 value 3431.051089
## final value 3431.050778
## converged
## # weights: 551
## initial value 4279.488960
## iter 10 value 3410.174590
## iter 20 value 3257.181604
## iter 30 value 3220.719863
## iter 40 value 3183.367976
## iter 50 value 3165.261010
## iter 60 value 3151.060531
## iter 70 value 3141.396913
## iter 80 value 3116.940247
## iter 90 value 2914.767809
## iter 100 value 2583.535935
## final value 2583.535935
## stopped after 100 iterations
## # weights: 111
## initial value 5797.194391
## final value 3431.008492
## converged
## # weights: 331
## initial value 4600.694454
## final value 3399.615250
## converged
## # weights: 551
## initial value 5714.728090
## final value 3431.015688
## converged
## # weights: 111
## initial value 6557.307537
## iter 10 value 5147.351844
## iter 20 value 4802.787929
## iter 30 value 4792.101791
## iter 40 value 4790.270988
## final value 4790.225871
## converged
Adult_TDA_KDE_5.50.5_n4_NN1Fit0
## Neural Network
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6691, 6693, 6692
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.7953762 0.0614239378
## 1 1e-04 0.7915922 0.0052504822
## 1 1e-01 0.8285525 0.3076400422
## 3 0e+00 0.7910939 0.0015063855
## 3 1e-04 0.7923889 0.0116859107
## 3 1e-01 0.8009594 0.0928249678
## 5 0e+00 0.7909942 0.0007525722
## 5 1e-04 0.7914922 0.0044967642
## 5 1e-01 0.8255648 0.3089940717
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 1 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8117717 0.1828468 Fold1
## 2 0.8215247 0.2211546 Fold2
## 3 0.8523610 0.5189187 Fold3
ad_tda_kde_5.50.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_NN1Fit0)
## a 108-1-1 network with 111 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -0.01 -0.14 0.00 0.00 0.00 0.00 -0.01 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.02 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.05 0.00 -0.02
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 -0.04 0.04 0.07 0.00 -0.08
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 -0.02
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.05 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.04 0.00 0.00 -0.08 0.07 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 -0.04 0.00 0.03 0.00 -0.01
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.62 -0.43 0.17 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 -0.01 0.00 0.00
## b->o h1->o
## 1.57 -3.06
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7337 1915
## >50K 79 437
##
## Accuracy : 0.7959
## 95% CI : (0.7877, 0.8038)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2388
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9893
## Specificity : 0.1858
## Pos Pred Value : 0.7930
## Neg Pred Value : 0.8469
## Prevalence : 0.7592
## Detection Rate : 0.7511
## Detection Prevalence : 0.9472
## Balanced Accuracy : 0.5876
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7337 1915
## >50K 79 437
##
## Accuracy : 0.7959
## 95% CI : (0.7877, 0.8038)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2388
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9893
## Specificity : 0.1858
## Pos Pred Value : 0.7930
## Neg Pred Value : 0.8469
## Prevalence : 0.7592
## Detection Rate : 0.7511
## Detection Prevalence : 0.9472
## Balanced Accuracy : 0.5876
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.958640e-01 2.387885e-01 7.877319e-01 8.038194e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.442541e-18 0.000000e+00
ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9893474 0.1857993 0.7930177
## Neg Pred Value Precision Recall
## 0.8468992 0.7930177 0.9893474
## F1 Prevalence Detection Rate
## 0.8803696 0.7592138 0.7511261
## Detection Prevalence Balanced Accuracy
## 0.9471744 0.5875733
ad_tda_kde_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n4_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n4_3_fold
## Accuracy
## 1 -0.01922205
## 2 -0.01170629
## 3 -0.05609885
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9086
##
## $winRope
## [1] 0.0914
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.8235127
##
## $rope
## [1] 0.1100822
##
## $right
## [1] 0.06640507
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
#bf_tda_kde_5.50.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold)
## t = -2.1147, df = 2, p-value = 0.1687
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.08803095 0.03001283
## sample estimates:
## mean of x
## -0.02900906
### Test set diff
diff_tda_kde_5.50.5_nn1.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n4_test
## Accuracy
## 0.05518018
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_test_odds.left<-bst_tda_kde_5.50.5_nn1.n4_test$probLeft/bst_tda_kde_5.50.5_nn1.n4_test$probRight
bst_tda_kde_5.50.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1585667
##
## $winRight
## [1] 0.8414333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n4_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_KDE_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec,
method = 'nnet',
trControl = fitControl,
metric='Accuracy')
## # weights: 111
## initial value 6024.442869
## iter 10 value 2140.512894
## iter 20 value 2140.490647
## iter 20 value 2140.490635
## iter 20 value 2140.490635
## final value 2140.490635
## converged
## # weights: 331
## initial value 3086.504343
## iter 10 value 2146.142505
## final value 2146.131718
## converged
## # weights: 551
## initial value 2727.185121
## iter 10 value 2151.768635
## final value 2151.762840
## converged
## # weights: 111
## initial value 3877.307464
## iter 10 value 2137.874465
## iter 20 value 1977.479059
## iter 30 value 1969.401851
## iter 40 value 1927.026389
## iter 50 value 1863.233262
## iter 60 value 1606.914263
## iter 70 value 1509.211080
## iter 80 value 1482.458248
## iter 90 value 1405.750368
## iter 100 value 1376.776999
## final value 1376.776999
## stopped after 100 iterations
## # weights: 331
## initial value 2779.128836
## iter 10 value 2157.493395
## iter 20 value 2157.286710
## iter 30 value 2099.834733
## iter 40 value 1974.343314
## iter 50 value 1966.845044
## iter 60 value 1878.786050
## iter 70 value 1667.380673
## iter 80 value 1614.252845
## iter 90 value 1526.975113
## iter 100 value 1435.572131
## final value 1435.572131
## stopped after 100 iterations
## # weights: 551
## initial value 2956.075235
## iter 10 value 2157.628334
## iter 20 value 2157.461267
## iter 30 value 2157.456882
## final value 2157.456779
## converged
## # weights: 111
## initial value 3835.842884
## final value 2157.386397
## converged
## # weights: 331
## initial value 2601.056410
## final value 2157.389529
## converged
## # weights: 551
## initial value 4972.003011
## final value 2123.545049
## converged
## # weights: 111
## initial value 4457.433858
## final value 2159.255618
## converged
## # weights: 331
## initial value 4158.814355
## final value 2159.255618
## converged
## # weights: 551
## initial value 4203.253305
## final value 2159.255618
## converged
## # weights: 111
## initial value 2636.813222
## iter 10 value 2159.546013
## iter 20 value 2060.795889
## iter 30 value 2021.380054
## iter 40 value 2020.534368
## iter 50 value 2020.030012
## iter 60 value 2020.001986
## iter 70 value 2017.782202
## iter 80 value 2017.516072
## final value 2017.515012
## converged
## # weights: 331
## initial value 3113.068073
## iter 10 value 2078.119028
## iter 20 value 2064.624735
## iter 30 value 2053.964814
## iter 40 value 2036.484947
## final value 2029.322434
## converged
## # weights: 551
## initial value 3125.919689
## iter 10 value 2159.404998
## iter 20 value 2159.352977
## iter 30 value 2159.334586
## iter 40 value 2094.929465
## iter 50 value 2093.737328
## iter 60 value 2093.728419
## iter 70 value 2042.738904
## iter 80 value 2002.689494
## iter 90 value 1977.244810
## iter 100 value 1610.743237
## final value 1610.743237
## stopped after 100 iterations
## # weights: 111
## initial value 2661.958823
## final value 2159.257846
## converged
## # weights: 331
## initial value 2926.039559
## iter 10 value 2151.781846
## final value 2151.777760
## converged
## # weights: 551
## initial value 3740.689754
## final value 2134.862943
## converged
## # weights: 111
## initial value 4091.635980
## final value 2157.551046
## converged
## # weights: 331
## initial value 2971.792100
## iter 10 value 2146.311494
## final value 2146.297500
## converged
## # weights: 551
## initial value 2916.303934
## final value 2157.551046
## converged
## # weights: 111
## initial value 2965.046366
## iter 10 value 2157.701646
## iter 20 value 2147.509693
## iter 30 value 2143.577754
## iter 40 value 2014.763529
## iter 50 value 1997.233090
## iter 60 value 1923.714143
## iter 70 value 1723.130118
## iter 80 value 1594.666841
## iter 90 value 1458.344261
## iter 100 value 1407.563585
## final value 1407.563585
## stopped after 100 iterations
## # weights: 331
## initial value 5771.291017
## iter 10 value 2156.663460
## iter 20 value 2156.638363
## iter 30 value 2036.268031
## iter 40 value 2009.479547
## iter 50 value 2008.340813
## final value 2008.335502
## converged
## # weights: 551
## initial value 2896.863885
## iter 10 value 2143.949240
## iter 20 value 1973.284550
## iter 30 value 1855.767397
## iter 40 value 1664.983166
## iter 50 value 1646.156925
## iter 60 value 1518.832759
## iter 70 value 1370.827030
## iter 80 value 1315.614761
## iter 90 value 1312.083106
## iter 100 value 1294.181766
## final value 1294.181766
## stopped after 100 iterations
## # weights: 111
## initial value 2613.025544
## final value 2157.553164
## converged
## # weights: 331
## initial value 3864.565883
## final value 2157.556945
## converged
## # weights: 551
## initial value 2576.285505
## final value 2146.312864
## converged
## # weights: 331
## initial value 5647.191897
## iter 10 value 3216.569710
## iter 20 value 3128.387791
## iter 30 value 2770.485167
## iter 40 value 2676.748932
## iter 50 value 2648.168974
## iter 60 value 2631.194645
## iter 70 value 2357.243610
## iter 80 value 2235.738191
## iter 90 value 2165.517927
## iter 100 value 2101.027110
## final value 2101.027110
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n5_NN1Fit0
## Neural Network
##
## 7540 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5026, 5027, 5027
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0e+00 0.8466842 0.005788476
## 1 1e-04 0.8461539 0.000000000
## 1 1e-01 0.8591505 0.315685954
## 3 0e+00 0.8477454 0.017282973
## 3 1e-04 0.8462865 0.001459210
## 3 1e-01 0.8623348 0.219213678
## 5 0e+00 0.8462864 0.001454869
## 5 1e-04 0.8485410 0.025902999
## 5 1e-01 0.8550410 0.132691908
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8579952 0.2337447 Fold1
## 2 0.8651015 0.2006675 Fold2
## 3 0.8639077 0.2232289 Fold3
ad_tda_kde_5.50.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.1
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.31 -0.09 0.31 -0.82 -0.04 0.45 -0.14 -0.22
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.33 0.13 0.30 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 -0.93 0.00 0.00 1.36
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 -0.11 0.85 1.10 -2.51 -2.03
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.99 1.37 1.38 0.00 0.76 -0.22 0.57 0.06
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.70 0.77 0.41 0.16 0.63 0.60 -1.51 -0.48
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -0.08 -1.11 0.45 -0.44 -1.33 0.75 4.17 -0.77
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -2.06 1.27 -1.57 -0.57 2.26 -1.08 0.54 -0.23
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 -0.04 -0.49 0.74 -0.51 -2.04 1.10
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -1.46 1.53 0.31 1.51 -0.27 0.03 -1.21 0.56
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## -1.11 0.08 0.00 0.13 1.01 0.16 0.74 -0.71
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -0.82 1.33 -2.19 1.81 0.93 -0.41 1.20 0.20
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.71 -1.97 -0.80 -0.17 1.16 0.53 -0.43 0.42
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## -0.17 1.16 -0.86 0.51 -1.94
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## 0.17 0.00 -5.48 0.22
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6374 1460
## >50K 1042 892
##
## Accuracy : 0.7439
## 95% CI : (0.7351, 0.7525)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9998
##
## Kappa : 0.2542
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8595
## Specificity : 0.3793
## Pos Pred Value : 0.8136
## Neg Pred Value : 0.4612
## Prevalence : 0.7592
## Detection Rate : 0.6525
## Detection Prevalence : 0.8020
## Balanced Accuracy : 0.6194
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6374 1460
## >50K 1042 892
##
## Accuracy : 0.7439
## 95% CI : (0.7351, 0.7525)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9998
##
## Kappa : 0.2542
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8595
## Specificity : 0.3793
## Pos Pred Value : 0.8136
## Neg Pred Value : 0.4612
## Prevalence : 0.7592
## Detection Rate : 0.6525
## Detection Prevalence : 0.8020
## Balanced Accuracy : 0.6194
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.438575e-01 2.541671e-01 7.350781e-01 7.524912e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.997987e-01 7.641451e-17
ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8594930 0.3792517 0.8136329
## Neg Pred Value Precision Recall
## 0.4612203 0.8136329 0.8594930
## F1 Prevalence Detection Rate
## 0.8359344 0.7592138 0.6525389
## Detection Prevalence Balanced Accuracy
## 0.8020066 0.6193723
ad_tda_kde_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n5_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n5_3_fold
## Accuracy
## 1 -0.06544554
## 2 -0.05528310
## 3 -0.06764551
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9913667
##
## $winRope
## [1] 0.008633333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.9965678
##
## $rope
## [1] 0.001618088
##
## $right
## [1] 0.001814075
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
#bf_tda_kde_5.50.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold)
## t = -16.492, df = 2, p-value = 0.003657
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.07917364 -0.04640912
## sample estimates:
## mean of x
## -0.06279138
### Test set diff
diff_tda_kde_5.50.5_nn1.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n5_test
## Accuracy
## 0.1071867
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_test_odds.left<-bst_tda_kde_5.50.5_nn1.n5_test$probLeft/bst_tda_kde_5.50.5_nn1.n5_test$probRight
bst_tda_kde_5.50.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1598667
##
## $winRight
## [1] 0.8401333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n5_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test))
##Logistic Regression
adultLrFit <- train(as.factor(adult_df1) ~ .,
data = adult.one_hot_df4Train,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results:
##
## Accuracy Kappa
## 0.8503929 0.5647736
adultLrFit$resample
## Accuracy Kappa Resample
## 1 0.8504870 0.5675901 Fold1
## 2 0.8562590 0.5793290 Fold2
## 3 0.8444327 0.5474018 Fold3
ad_lr_fit_re<-adultLrFit$resample[1]
summary(adultLrFit)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.004e+11 3.373e+12 0.030 0.976258
## V1 2.436e-02 1.969e-03 12.371 < 2e-16 ***
## V2.. -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Federal.gov -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Local.gov -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Never.worked -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Private -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Self.emp.inc -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Self.emp.not.inc -1.004e+11 3.373e+12 -0.030 0.976258
## V2.State.gov -1.004e+11 3.373e+12 -0.030 0.976258
## V2.Without.pay -1.004e+11 3.373e+12 -0.030 0.976258
## V3 6.225e-07 2.063e-07 3.017 0.002549 **
## V4.10th -1.020e+00 1.790e-01 -5.700 1.20e-08 ***
## V4.11th -1.030e+00 1.814e-01 -5.677 1.37e-08 ***
## V4.12th -8.317e-01 2.708e-01 -3.072 0.002128 **
## V4.1st.4th -2.383e+00 7.483e-01 -3.184 0.001453 **
## V4.5th.6th -1.330e+00 3.427e-01 -3.882 0.000104 ***
## V4.7th.8th -1.706e+00 2.248e-01 -7.591 3.18e-14 ***
## V4.9th -1.220e+00 2.454e-01 -4.972 6.62e-07 ***
## V4.Assoc.acdm 1.108e-01 1.159e-01 0.956 0.339220
## V4.Assoc.voc 2.123e-01 1.031e-01 2.060 0.039387 *
## V4.Bachelors 7.471e-01 6.698e-02 11.154 < 2e-16 ***
## V4.Doctorate 2.044e+00 1.955e-01 10.458 < 2e-16 ***
## V4.HS.grad -3.357e-01 6.048e-02 -5.550 2.85e-08 ***
## V4.Masters 1.234e+00 9.712e-02 12.708 < 2e-16 ***
## V4.Preschool -3.280e+01 4.990e+04 -0.001 0.999475
## V4.Prof.school 1.941e+00 1.672e-01 11.610 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.052e-01 1.867e-01 -1.099 0.271725
## V6.Married.AF.spouse 2.342e+00 6.905e-01 3.391 0.000695 ***
## V6.Married.civ.spouse 1.906e+00 3.546e-01 5.373 7.73e-08 ***
## V6.Married.spouse.absent -2.256e-01 3.165e-01 -0.713 0.476062
## V6.Never.married -6.900e-01 1.938e-01 -3.560 0.000371 ***
## V6.Separated -2.544e-01 2.488e-01 -1.022 0.306548
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 8.686e-02 1.186e-01 0.732 0.464145
## V7.Armed.Forces -6.328e-01 1.843e+00 -0.343 0.731268
## V7.Craft.repair 2.720e-01 1.006e-01 2.704 0.006854 **
## V7.Exec.managerial 9.426e-01 1.036e-01 9.094 < 2e-16 ***
## V7.Farming.fishing -6.978e-01 1.656e-01 -4.215 2.50e-05 ***
## V7.Handlers.cleaners -5.443e-01 1.765e-01 -3.084 0.002045 **
## V7.Machine.op.inspct -9.237e-03 1.261e-01 -0.073 0.941622
## V7.Other.service -6.240e-01 1.487e-01 -4.196 2.72e-05 ***
## V7.Priv.house.serv -3.986e+00 1.779e+00 -2.240 0.025085 *
## V7.Prof.specialty 6.232e-01 1.116e-01 5.583 2.37e-08 ***
## V7.Protective.serv 8.390e-01 1.542e-01 5.443 5.25e-08 ***
## V7.Sales 4.484e-01 1.073e-01 4.180 2.92e-05 ***
## V7.Tech.support 7.562e-01 1.423e-01 5.314 1.07e-07 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.391e+00 1.239e-01 -11.234 < 2e-16 ***
## V8.Not.in.family -9.144e-01 3.262e-01 -2.804 0.005053 **
## V8.Other.relative -1.789e+00 3.050e-01 -5.865 4.48e-09 ***
## V8.Own.child -2.218e+00 3.258e-01 -6.809 9.81e-12 ***
## V8.Unmarried -1.154e+00 3.389e-01 -3.404 0.000663 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -5.667e-01 2.717e-01 -2.086 0.036987 *
## V9.Asian.Pac.Islander 2.355e-01 1.888e-01 1.247 0.212284
## V9.Black -1.908e-01 9.215e-02 -2.071 0.038362 *
## V9.Other -5.408e-01 3.570e-01 -1.515 0.129857
## V9.White NA NA NA NA
## V10.Female -8.573e-01 9.640e-02 -8.893 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.299e-04 1.249e-05 26.427 < 2e-16 ***
## V12 6.850e-04 4.458e-05 15.367 < 2e-16 ***
## V13 2.745e-02 1.928e-03 14.238 < 2e-16 ***
## V14.. -8.561e-01 7.331e-01 -1.168 0.242854
## V14.Cambodia 1.024e+00 1.201e+00 0.853 0.393858
## V14.Canada -6.150e-01 7.825e-01 -0.786 0.431884
## V14.China -1.618e+00 8.392e-01 -1.928 0.053845 .
## V14.Columbia -2.853e+00 1.102e+00 -2.590 0.009596 **
## V14.Cuba -3.366e-01 8.055e-01 -0.418 0.676041
## V14.Dominican.Republic -1.558e+01 5.124e+02 -0.030 0.975747
## V14.Ecuador -4.222e-01 1.048e+00 -0.403 0.687012
## V14.El.Salvador -1.642e+00 9.100e-01 -1.805 0.071101 .
## V14.England -6.938e-01 8.002e-01 -0.867 0.385949
## V14.France 1.155e-01 9.685e-01 0.119 0.905070
## V14.Germany -3.877e-01 7.768e-01 -0.499 0.617685
## V14.Greece -1.868e+00 9.679e-01 -1.930 0.053667 .
## V14.Guatemala -1.259e+00 1.230e+00 -1.024 0.306049
## V14.Haiti -1.756e+00 1.304e+00 -1.347 0.177919
## V14.Holand.Netherlands -2.348e+01 3.498e+05 0.000 0.999946
## V14.Honduras -2.044e+00 2.826e+00 -0.723 0.469603
## V14.Hong -1.077e+00 1.156e+00 -0.931 0.351711
## V14.Hungary -2.171e-01 1.174e+00 -0.185 0.853253
## V14.India -1.140e+00 7.971e-01 -1.430 0.152592
## V14.Iran -7.421e-01 8.728e-01 -0.850 0.395173
## V14.Ireland 1.776e-01 9.909e-01 0.179 0.857786
## V14.Italy -1.438e-01 8.048e-01 -0.179 0.858227
## V14.Jamaica -4.129e-01 8.575e-01 -0.482 0.630154
## V14.Japan -3.069e-01 8.731e-01 -0.352 0.725187
## V14.Laos -1.260e+00 1.153e+00 -1.093 0.274549
## V14.Mexico -1.222e+00 7.591e-01 -1.610 0.107331
## V14.Nicaragua -1.457e+00 1.075e+00 -1.354 0.175590
## V14.Outlying.US.Guam.USVI.etc. -2.551e+01 1.176e+05 0.000 0.999827
## V14.Peru -2.316e+00 1.358e+00 -1.706 0.087944 .
## V14.Philippines -3.658e-01 7.853e-01 -0.466 0.641382
## V14.Poland -9.308e-01 8.405e-01 -1.107 0.268139
## V14.Portugal -4.015e-01 9.766e-01 -0.411 0.681005
## V14.Puerto.Rico -1.043e+00 8.473e-01 -1.231 0.218294
## V14.Scotland -4.331e-01 1.124e+00 -0.385 0.700074
## V14.South -2.195e+00 8.821e-01 -2.489 0.012820 *
## V14.Taiwan -1.294e+00 9.023e-01 -1.434 0.151698
## V14.Thailand -1.359e+00 1.131e+00 -1.202 0.229535
## V14.Trinadad.Tobago -7.640e-01 1.152e+00 -0.663 0.507367
## V14.United.States -6.141e-01 7.150e-01 -0.859 0.390402
## V14.Vietnam -2.341e+00 1.099e+00 -2.130 0.033147 *
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25165 on 22792 degrees of freedom
## Residual deviance: 14314 on 22693 degrees of freedom
## AIC: 14514
##
## Number of Fisher Scoring iterations: 25
#varImp (adultLrFit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6930 964
## >50K 486 1388
##
## Accuracy : 0.8516
## 95% CI : (0.8443, 0.8586)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5637
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9345
## Specificity : 0.5901
## Pos Pred Value : 0.8779
## Neg Pred Value : 0.7407
## Prevalence : 0.7592
## Detection Rate : 0.7095
## Detection Prevalence : 0.8081
## Balanced Accuracy : 0.7623
##
## 'Positive' Class : <=50K
##
lr_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.515561e-01 5.637173e-01 8.443495e-01 8.585524e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.269119e-112 5.338073e-36
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9344660 0.5901361 0.8778819
## Neg Pred Value Precision Recall
## 0.7406617 0.8778819 0.9344660
## F1 Prevalence Detection Rate
## 0.9052907 0.7592138 0.7094595
## Detection Prevalence Balanced Accuracy
## 0.8081491 0.7623010
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, family = 'binomial')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0
## Generalized Linear Model
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3277, 3279, 3278
## Resampling results:
##
## Accuracy Kappa
## 0.9723412 0.01106462
Adult_TDA_PC_5.50.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9719512 -0.002338435 Fold1
## 2 0.9737485 0.000000000 Fold2
## 3 0.9713240 0.035532296 Fold3
ad_tda_pc_5.50.5_n1_lr_fit_re<-Adult_TDA_PC_5.50.5_n1_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (25 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.754e+15 1.035e+08 65242260 <2e-16 ***
## V1 -4.021e+13 1.001e+05 -401739559 <2e-16 ***
## V2.. 1.555e+15 1.144e+07 135880986 <2e-16 ***
## V2.Federal.gov -8.210e+14 6.396e+06 -128368422 <2e-16 ***
## V2.Local.gov -3.322e+14 5.648e+06 -58813598 <2e-16 ***
## V2.Never.worked NA NA NA NA
## V2.Private 2.257e+14 4.636e+06 48689222 <2e-16 ***
## V2.Self.emp.inc -1.030e+15 5.363e+06 -192126826 <2e-16 ***
## V2.Self.emp.not.inc -1.764e+15 5.342e+06 -330201149 <2e-16 ***
## V2.State.gov NA NA NA NA
## V2.Without.pay NA NA NA NA
## V3 1.936e+08 9.807e+00 19741148 <2e-16 ***
## V4.10th 5.343e+14 1.892e+07 28232875 <2e-16 ***
## V4.11th 1.643e+15 2.266e+07 72471752 <2e-16 ***
## V4.12th -5.781e+14 3.020e+07 -19138755 <2e-16 ***
## V4.1st.4th 1.762e+15 6.954e+07 25333567 <2e-16 ***
## V4.5th.6th 1.178e+15 3.942e+07 29884200 <2e-16 ***
## V4.7th.8th 1.870e+15 1.548e+07 120788873 <2e-16 ***
## V4.9th 1.995e+15 3.027e+07 65929241 <2e-16 ***
## V4.Assoc.acdm -6.403e+14 5.949e+06 -107617549 <2e-16 ***
## V4.Assoc.voc -4.419e+14 5.274e+06 -83784354 <2e-16 ***
## V4.Bachelors -9.810e+14 3.195e+06 -307070713 <2e-16 ***
## V4.Doctorate -2.241e+15 5.430e+06 -412621450 <2e-16 ***
## V4.HS.grad 1.710e+14 3.533e+06 48411646 <2e-16 ***
## V4.Masters -1.096e+15 3.832e+06 -286104624 <2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school -1.241e+15 4.949e+06 -250795563 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 2.246e+15 6.983e+07 32166187 <2e-16 ***
## V6.Married.AF.spouse 5.415e+15 1.067e+08 50765144 <2e-16 ***
## V6.Married.civ.spouse 3.561e+15 9.547e+07 37296901 <2e-16 ***
## V6.Married.spouse.absent -3.913e+14 9.503e+07 -4116981 <2e-16 ***
## V6.Never.married -1.548e+14 7.763e+07 -1993868 <2e-16 ***
## V6.Separated NA NA NA NA
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -8.947e+13 8.646e+06 -10348260 <2e-16 ***
## V7.Armed.Forces 1.572e+15 6.759e+07 23257677 <2e-16 ***
## V7.Craft.repair -3.669e+12 5.511e+06 -665768 <2e-16 ***
## V7.Exec.managerial 3.655e+14 5.294e+06 69045202 <2e-16 ***
## V7.Farming.fishing -5.738e+14 8.252e+06 -69535707 <2e-16 ***
## V7.Handlers.cleaners 5.386e+14 1.867e+07 28841951 <2e-16 ***
## V7.Machine.op.inspct 7.351e+14 1.057e+07 69529076 <2e-16 ***
## V7.Other.service 2.137e+15 1.938e+07 110275000 <2e-16 ***
## V7.Priv.house.serv NA NA NA NA
## V7.Prof.specialty 3.170e+14 5.589e+06 56730038 <2e-16 ***
## V7.Protective.serv -5.345e+14 7.672e+06 -69670508 <2e-16 ***
## V7.Sales 8.977e+14 5.694e+06 157643421 <2e-16 ***
## V7.Tech.support 7.911e+14 7.616e+06 103877755 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.433e+15 1.969e+07 -72760359 <2e-16 ***
## V8.Not.in.family 1.555e+15 7.050e+07 22057608 <2e-16 ***
## V8.Other.relative -4.701e+13 7.015e+07 -670114 <2e-16 ***
## V8.Own.child NA NA NA NA
## V8.Unmarried -5.367e+14 8.790e+07 -6106524 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 8.136e+14 2.250e+07 36160676 <2e-16 ***
## V9.Asian.Pac.Islander -1.535e+15 9.355e+06 -164127666 <2e-16 ***
## V9.Black -1.255e+15 7.568e+06 -165859104 <2e-16 ***
## V9.Other 1.048e+15 2.296e+07 45649335 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female NA NA NA NA
## V10.Male NA NA NA NA
## V11 1.515e+10 6.103e+01 248215727 <2e-16 ***
## V12 1.548e+11 1.518e+03 101924136 <2e-16 ***
## V13 -4.157e+13 8.947e+04 -464636898 <2e-16 ***
## V14.. -2.127e+14 3.449e+07 -6166348 <2e-16 ***
## V14.Cambodia 1.804e+15 5.903e+07 30563828 <2e-16 ***
## V14.Canada -9.754e+14 3.623e+07 -26924014 <2e-16 ***
## V14.China 3.139e+14 3.899e+07 8050228 <2e-16 ***
## V14.Columbia 2.155e+15 7.535e+07 28607405 <2e-16 ***
## V14.Cuba -1.569e+15 3.936e+07 -39865728 <2e-16 ***
## V14.Dominican.Republic NA NA NA NA
## V14.Ecuador 2.000e+14 5.830e+07 3431112 <2e-16 ***
## V14.El.Salvador 5.352e+14 4.521e+07 11838315 <2e-16 ***
## V14.England 2.845e+13 3.794e+07 749833 <2e-16 ***
## V14.France -3.013e+14 4.125e+07 -7303826 <2e-16 ***
## V14.Germany -3.903e+14 3.639e+07 -10723816 <2e-16 ***
## V14.Greece -3.609e+15 4.359e+07 -82783695 <2e-16 ***
## V14.Guatemala NA NA NA NA
## V14.Haiti NA NA NA NA
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras NA NA NA NA
## V14.Hong 2.628e+14 4.616e+07 5693624 <2e-16 ***
## V14.Hungary 6.213e+14 5.836e+07 10646011 <2e-16 ***
## V14.India 3.249e+13 3.665e+07 886349 <2e-16 ***
## V14.Iran -3.496e+15 3.831e+07 -91256665 <2e-16 ***
## V14.Ireland -2.987e+15 5.141e+07 -58110019 <2e-16 ***
## V14.Italy 3.034e+13 3.807e+07 797040 <2e-16 ***
## V14.Jamaica 2.904e+15 7.572e+07 38349803 <2e-16 ***
## V14.Japan 5.594e+14 3.867e+07 14465190 <2e-16 ***
## V14.Laos 2.427e+15 7.625e+07 31826490 <2e-16 ***
## V14.Mexico 1.998e+14 3.917e+07 5100435 <2e-16 ***
## V14.Nicaragua NA NA NA NA
## V14.Outlying.US.Guam.USVI.etc. NA NA NA NA
## V14.Peru -1.239e+15 7.521e+07 -16478711 <2e-16 ***
## V14.Philippines -1.491e+15 3.695e+07 -40349667 <2e-16 ***
## V14.Poland -2.029e+14 4.348e+07 -4666562 <2e-16 ***
## V14.Portugal 1.810e+13 5.836e+07 310228 <2e-16 ***
## V14.Puerto.Rico -5.417e+14 4.800e+07 -11284445 <2e-16 ***
## V14.Scotland 2.444e+15 7.521e+07 32494881 <2e-16 ***
## V14.South -2.945e+14 4.099e+07 -7184583 <2e-16 ***
## V14.Taiwan 1.731e+15 3.897e+07 44411006 <2e-16 ***
## V14.Thailand 1.398e+13 5.903e+07 236749 <2e-16 ***
## V14.Trinadad.Tobago NA NA NA NA
## V14.United.States -8.790e+14 3.373e+07 -26058180 <2e-16 ***
## V14.Vietnam 3.345e+15 7.597e+07 44023473 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1208.3 on 4916 degrees of freedom
## Residual deviance: 9587.6 on 4833 degrees of freedom
## AIC: 9755.6
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 38 21
## >50K 7378 2331
##
## Accuracy : 0.2425
## 95% CI : (0.2341, 0.2512)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0018
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.005124
## Specificity : 0.991071
## Pos Pred Value : 0.644068
## Neg Pred Value : 0.240087
## Prevalence : 0.759214
## Detection Rate : 0.003890
## Detection Prevalence : 0.006040
## Balanced Accuracy : 0.498098
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 38 21
## >50K 7378 2331
##
## Accuracy : 0.2425
## 95% CI : (0.2341, 0.2512)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0018
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.005124
## Specificity : 0.991071
## Pos Pred Value : 0.644068
## Neg Pred Value : 0.240087
## Prevalence : 0.759214
## Detection Rate : 0.003890
## Detection Prevalence : 0.006040
## Balanced Accuracy : 0.498098
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.242526618 -0.001839738 0.234053383 0.251153627 0.759213759
## AccuracyPValue McnemarPValue
## 1.000000000 0.000000000
ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.005124056 0.991071429 0.644067797
## Neg Pred Value Precision Recall
## 0.240086518 0.644067797 0.005124056
## F1 Prevalence Detection Rate
## 0.010167224 0.759213759 0.003890254
## Detection Prevalence Balanced Accuracy
## 0.006040131 0.498097742
ad_tda_pc_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n1_lr_fit_re)
diff_tda_pca_5.50.5_lr_n1_3_fold
## Accuracy
## 1 -0.1214642
## 2 -0.1174894
## 3 -0.1268912
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n1_3_fold$probRight
bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.9917
##
## $winRope
## [1] 0.0083
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_3_fold
## $left
## [1] 0.9996055
##
## $rope
## [1] 0.000110433
##
## $right
## [1] 0.0002840615
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
#bf_tda_pca_5.50.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold)
## t = -44.754, df = 2, p-value = 0.0004989
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1336723 -0.1102243
## sample estimates:
## mean of x
## -0.1219483
### Test set diff
diff_tda_pca_5.50.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n1_test
## Accuracy
## 0.6090295
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_test_odds.left<-bst_tda_pca_5.50.5_lr.n1_test$probLeft/bst_tda_pca_5.50.5_lr.n1_test$probRight
bst_tda_pca_5.50.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1582333
##
## $winRight
## [1] 0.8417667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n2_LrFit0
## Generalized Linear Model
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8138, 8138, 8136
## Resampling results:
##
## Accuracy Kappa
## 0.7145669 0.4268096
Adult_TDA_PC_5.50.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.7163225 0.4294322 Fold1
## 2 0.7148476 0.4294944 Fold2
## 3 0.7125307 0.4215021 Fold3
ad_tda_pc_5.50.5_n2_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.967e+13 2.870e+13 -6.850e-01 0.493065
## V1 1.336e-02 2.046e-03 6.528e+00 6.67e-11 ***
## V2.. -4.308e+12 8.550e+12 -5.040e-01 0.614376
## V2.Federal.gov -4.308e+12 8.551e+12 -5.040e-01 0.614396
## V2.Local.gov -4.308e+12 8.551e+12 -5.040e-01 0.614388
## V2.Never.worked NA NA NA NA
## V2.Private -4.308e+12 8.551e+12 -5.040e-01 0.614408
## V2.Self.emp.inc -4.308e+12 8.551e+12 -5.040e-01 0.614386
## V2.Self.emp.not.inc -4.308e+12 8.551e+12 -5.040e-01 0.614388
## V2.State.gov -4.308e+12 8.550e+12 -5.040e-01 0.614372
## V2.Without.pay -4.308e+12 8.551e+12 -5.040e-01 0.614382
## V3 1.019e-06 2.175e-07 4.687e+00 2.77e-06 ***
## V4.10th -6.537e-01 1.826e-01 -3.579e+00 0.000345 ***
## V4.11th -7.294e-01 2.044e-01 -3.568e+00 0.000359 ***
## V4.12th -2.521e-01 2.832e-01 -8.900e-01 0.373348
## V4.1st.4th -5.050e-01 5.586e-01 -9.040e-01 0.365952
## V4.5th.6th -5.680e-01 3.741e-01 -1.518e+00 0.128967
## V4.7th.8th -1.287e+00 2.030e-01 -6.339e+00 2.31e-10 ***
## V4.9th -1.013e+00 2.999e-01 -3.376e+00 0.000734 ***
## V4.Assoc.acdm 5.309e-03 1.222e-01 4.300e-02 0.965361
## V4.Assoc.voc -4.145e-02 1.021e-01 -4.060e-01 0.684913
## V4.Bachelors 5.747e-01 6.873e-02 8.361e+00 < 2e-16 ***
## V4.Doctorate 1.132e+00 1.918e-01 5.905e+00 3.53e-09 ***
## V4.HS.grad -3.084e-01 6.013e-02 -5.129e+00 2.91e-07 ***
## V4.Masters 9.349e-01 1.021e-01 9.157e+00 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 1.073e+00 1.699e-01 6.317e+00 2.67e-10 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Married.AF.spouse 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Married.civ.spouse 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Married.spouse.absent 4.528e+15 3.454e+13 1.311e+02 < 2e-16 ***
## V6.Never.married 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Separated 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Widowed 2.398e+13 3.454e+13 6.940e-01 0.487623
## V7.. NA NA NA NA
## V7.Adm.clerical 6.669e-01 1.361e-01 4.899e+00 9.63e-07 ***
## V7.Armed.Forces -5.528e-01 1.732e+00 -3.190e-01 0.749587
## V7.Craft.repair 1.326e-01 9.167e-02 1.447e+00 0.148034
## V7.Exec.managerial 9.516e-01 9.742e-02 9.768e+00 < 2e-16 ***
## V7.Farming.fishing -6.079e-01 1.485e-01 -4.092e+00 4.27e-05 ***
## V7.Handlers.cleaners 6.106e-02 1.781e-01 3.430e-01 0.731742
## V7.Machine.op.inspct 1.666e-01 1.202e-01 1.386e+00 0.165808
## V7.Other.service 1.136e-02 1.794e-01 6.300e-02 0.949500
## V7.Priv.house.serv -2.811e+01 3.621e+05 0.000e+00 0.999938
## V7.Prof.specialty 6.883e-01 1.073e-01 6.417e+00 1.39e-10 ***
## V7.Protective.serv 6.582e-01 1.474e-01 4.466e+00 7.97e-06 ***
## V7.Sales 4.691e-01 1.005e-01 4.669e+00 3.03e-06 ***
## V7.Tech.support 9.348e-01 1.447e-01 6.462e+00 1.03e-10 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband 4.404e-01 1.426e+00 3.090e-01 0.757500
## V8.Not.in.family 1.221e+00 1.601e+00 7.630e-01 0.445618
## V8.Other.relative 7.811e-01 1.527e+00 5.120e-01 0.608889
## V8.Own.child 1.533e+00 1.591e+00 9.630e-01 0.335406
## V8.Unmarried 2.370e+01 3.638e+04 1.000e-03 0.999480
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -4.932e-01 3.265e-01 -1.511e+00 0.130889
## V9.Asian.Pac.Islander 2.948e-01 2.032e-01 1.450e+00 0.146939
## V9.Black 9.881e-01 1.415e-01 6.985e+00 2.84e-12 ***
## V9.Other 4.578e-01 4.690e-01 9.760e-01 0.329059
## V9.White NA NA NA NA
## V10.Female 4.686e+00 1.511e+00 3.101e+00 0.001930 **
## V10.Male NA NA NA NA
## V11 2.798e-04 1.449e-05 1.931e+01 < 2e-16 ***
## V12 5.656e-04 4.669e-05 1.211e+01 < 2e-16 ***
## V13 2.005e-02 2.014e-03 9.957e+00 < 2e-16 ***
## V14.. -3.265e-01 6.869e-01 -4.750e-01 0.634603
## V14.Cambodia 1.581e+00 1.115e+00 1.418e+00 0.156332
## V14.Canada 3.195e-01 7.366e-01 4.340e-01 0.664429
## V14.China -1.030e+00 8.133e-01 -1.267e+00 0.205153
## V14.Columbia -2.191e+00 1.156e+00 -1.895e+00 0.058028 .
## V14.Cuba 5.253e-01 7.717e-01 6.810e-01 0.496104
## V14.Dominican.Republic -2.697e+01 2.279e+05 0.000e+00 0.999906
## V14.Ecuador -2.249e-01 1.043e+00 -2.160e-01 0.829290
## V14.El.Salvador -3.510e-01 9.132e-01 -3.840e-01 0.700685
## V14.England 2.505e-01 7.825e-01 3.200e-01 0.748876
## V14.France 5.230e-01 1.010e+00 5.180e-01 0.604632
## V14.Germany 4.033e-01 7.343e-01 5.490e-01 0.582832
## V14.Greece -1.370e+00 9.195e-01 -1.490e+00 0.136305
## V14.Guatemala -1.322e+00 1.963e+00 -6.740e-01 0.500601
## V14.Haiti 1.385e-01 1.430e+00 9.700e-02 0.922871
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 4.504e+15 6.711e+07 6.711e+07 < 2e-16 ***
## V14.Hong -1.354e-01 1.065e+00 -1.270e-01 0.898818
## V14.Hungary -3.015e-02 1.194e+00 -2.500e-02 0.979847
## V14.India -9.204e-01 7.529e-01 -1.222e+00 0.221573
## V14.Iran -6.423e-02 8.333e-01 -7.700e-02 0.938563
## V14.Ireland 1.402e+00 1.331e+00 1.054e+00 0.291937
## V14.Italy 1.437e-01 7.628e-01 1.880e-01 0.850565
## V14.Jamaica -2.473e-01 9.478e-01 -2.610e-01 0.794146
## V14.Japan -4.180e-01 8.248e-01 -5.070e-01 0.612321
## V14.Laos 2.603e+01 4.776e+05 0.000e+00 0.999957
## V14.Mexico 1.362e-01 7.326e-01 1.860e-01 0.852530
## V14.Nicaragua -1.396e+00 1.467e+00 -9.520e-01 0.341303
## V14.Outlying.US.Guam.USVI.etc. -2.492e+01 2.745e+05 0.000e+00 0.999928
## V14.Peru 2.271e-01 1.315e+00 1.730e-01 0.862897
## V14.Philippines 5.721e-01 7.622e-01 7.510e-01 0.452892
## V14.Poland -1.261e-01 8.095e-01 -1.560e-01 0.876255
## V14.Portugal -3.059e-01 1.145e+00 -2.670e-01 0.789288
## V14.Puerto.Rico -4.806e-01 9.040e-01 -5.320e-01 0.594987
## V14.Scotland 7.373e-01 1.418e+00 5.200e-01 0.603005
## V14.South -9.358e-01 8.345e-01 -1.121e+00 0.262128
## V14.Taiwan -5.569e-01 8.562e-01 -6.500e-01 0.515408
## V14.Thailand -6.617e-02 1.450e+00 -4.600e-02 0.963608
## V14.Trinadad.Tobago 2.728e+01 2.628e+05 0.000e+00 0.999917
## V14.United.States 9.008e-02 6.663e-01 1.350e-01 0.892453
## V14.Vietnam -1.255e+00 1.123e+00 -1.118e+00 0.263501
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 16823 on 12205 degrees of freedom
## Residual deviance: 12903 on 12108 degrees of freedom
## AIC: 13099
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1776 541
## >50K 5640 1811
##
## Accuracy : 0.3672
## 95% CI : (0.3576, 0.3769)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0054
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2395
## Specificity : 0.7700
## Pos Pred Value : 0.7665
## Neg Pred Value : 0.2431
## Prevalence : 0.7592
## Detection Rate : 0.1818
## Detection Prevalence : 0.2372
## Balanced Accuracy : 0.5047
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1776 541
## >50K 5640 1811
##
## Accuracy : 0.3672
## 95% CI : (0.3576, 0.3769)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0054
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2395
## Specificity : 0.7700
## Pos Pred Value : 0.7665
## Neg Pred Value : 0.2431
## Prevalence : 0.7592
## Detection Rate : 0.1818
## Detection Prevalence : 0.2372
## Balanced Accuracy : 0.5047
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.367219492 0.005439181 0.357649584 0.376868634 0.759213759
## AccuracyPValue McnemarPValue
## 1.000000000 0.000000000
ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2394822 0.7699830 0.7665084
## Neg Pred Value Precision Recall
## 0.2430546 0.7665084 0.2394822
## F1 Prevalence Detection Rate
## 0.3649440 0.7592138 0.1818182
## Detection Prevalence Balanced Accuracy
## 0.2372031 0.5047326
ad_tda_pc_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n2_lr_fit_re)
diff_tda_pca_5.50.5_lr_n2_3_fold
## Accuracy
## 1 0.1341645
## 2 0.1414115
## 3 0.1319020
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n2_3_fold$probRight
bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009266667
##
## $winRight
## [1] 0.9907333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_3_fold
## $left
## [1] 0.0002576845
##
## $rope
## [1] 8.833637e-05
##
## $right
## [1] 0.999654
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
#bf_tda_pca_5.50.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold)
## t = 47.358, df = 2, p-value = 0.0004456
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1234856 0.1481664
## sample estimates:
## mean of x
## 0.135826
### Test set diff
diff_tda_pca_5.50.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n2_test
## Accuracy
## 0.4843366
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_test_odds.left<-bst_tda_pca_5.50.5_lr.n2_test$probLeft/bst_tda_pca_5.50.5_lr.n2_test$probRight
bst_tda_pca_5.50.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1597333
##
## $winRight
## [1] 0.8402667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n3_LrFit0
## Generalized Linear Model
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8828, 8826, 8826
## Resampling results:
##
## Accuracy Kappa
## 0.8289284 0.4269351
Adult_TDA_PC_5.50.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8349955 0.4419374 Fold1
## 2 0.8253285 0.4083828 Fold2
## 3 0.8264613 0.4304852 Fold3
ad_tda_pc_5.50.5_n3_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.180e+13 8.650e+12 -1.364 0.172639
## V1 2.544e-03 2.296e-03 1.108 0.267922
## V2.. 1.180e+13 8.650e+12 1.364 0.172639
## V2.Federal.gov 1.180e+13 8.650e+12 1.364 0.172639
## V2.Local.gov 1.180e+13 8.650e+12 1.364 0.172639
## V2.Never.worked NA NA NA NA
## V2.Private 1.180e+13 8.650e+12 1.364 0.172639
## V2.Self.emp.inc 1.180e+13 8.650e+12 1.364 0.172639
## V2.Self.emp.not.inc 1.180e+13 8.650e+12 1.364 0.172639
## V2.State.gov 1.180e+13 8.650e+12 1.364 0.172639
## V2.Without.pay 1.180e+13 8.650e+12 1.364 0.172639
## V3 1.127e-06 2.361e-07 4.773 1.81e-06 ***
## V4.10th -3.082e-01 1.751e-01 -1.760 0.078340 .
## V4.11th -4.031e-01 1.777e-01 -2.269 0.023258 *
## V4.12th 1.067e-01 2.582e-01 0.413 0.679442
## V4.1st.4th -9.017e-01 4.887e-01 -1.845 0.065014 .
## V4.5th.6th -9.334e-01 3.349e-01 -2.787 0.005318 **
## V4.7th.8th -1.341e+00 2.545e-01 -5.269 1.37e-07 ***
## V4.9th -9.128e-01 2.554e-01 -3.575 0.000351 ***
## V4.Assoc.acdm -5.927e-01 1.449e-01 -4.092 4.28e-05 ***
## V4.Assoc.voc -4.086e-01 1.262e-01 -3.236 0.001210 **
## V4.Bachelors -6.358e-01 8.503e-02 -7.478 7.57e-14 ***
## V4.Doctorate 1.530e-01 2.176e-01 0.703 0.482035
## V4.HS.grad -3.751e-01 6.898e-02 -5.437 5.42e-08 ***
## V4.Masters -4.995e-01 1.200e-01 -4.163 3.14e-05 ***
## V4.Preschool -3.157e+01 6.581e+04 0.000 0.999617
## V4.Prof.school -7.407e-02 1.962e-01 -0.378 0.705726
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -5.751e-01 1.975e-01 -2.912 0.003586 **
## V6.Married.AF.spouse 1.453e+00 8.056e-01 1.804 0.071232 .
## V6.Married.civ.spouse -2.388e-01 3.406e-01 -0.701 0.483250
## V6.Married.spouse.absent -4.412e-01 3.077e-01 -1.434 0.151629
## V6.Never.married -3.763e-01 2.065e-01 -1.822 0.068456 .
## V6.Separated -3.602e-01 2.592e-01 -1.389 0.164685
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 1.219e+00 1.374e-01 8.872 < 2e-16 ***
## V7.Armed.Forces -2.519e+01 2.088e+05 0.000 0.999904
## V7.Craft.repair 1.752e-01 1.230e-01 1.425 0.154147
## V7.Exec.managerial 5.016e-01 1.296e-01 3.871 0.000108 ***
## V7.Farming.fishing -1.295e+00 2.989e-01 -4.331 1.48e-05 ***
## V7.Handlers.cleaners 4.344e-01 1.723e-01 2.522 0.011685 *
## V7.Machine.op.inspct 6.695e-01 1.368e-01 4.895 9.84e-07 ***
## V7.Other.service 3.295e-01 1.593e-01 2.069 0.038550 *
## V7.Priv.house.serv -2.618e+00 7.482e+00 -0.350 0.726442
## V7.Prof.specialty 4.234e-01 1.366e-01 3.100 0.001937 **
## V7.Protective.serv 2.434e-01 2.090e-01 1.164 0.244225
## V7.Sales 7.925e-01 1.283e-01 6.175 6.61e-10 ***
## V7.Tech.support 1.111e+00 1.648e-01 6.740 1.58e-11 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -8.944e-01 1.333e-01 -6.709 1.97e-11 ***
## V8.Not.in.family 3.686e-02 3.046e-01 0.121 0.903684
## V8.Other.relative -4.079e-01 2.831e-01 -1.441 0.149643
## V8.Own.child -4.954e-01 2.973e-01 -1.666 0.095670 .
## V8.Unmarried 1.740e-01 3.193e-01 0.545 0.585776
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 3.932e-01 2.667e-01 1.474 0.140444
## V9.Asian.Pac.Islander 6.353e-01 2.048e-01 3.101 0.001926 **
## V9.Black 9.496e-01 9.520e-02 9.975 < 2e-16 ***
## V9.Other 4.337e-01 3.313e-01 1.309 0.190520
## V9.White NA NA NA NA
## V10.Female 1.575e+00 1.125e-01 13.996 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 2.870e-04 1.397e-05 20.547 < 2e-16 ***
## V12 2.752e-04 5.333e-05 5.161 2.46e-07 ***
## V13 4.299e-03 2.299e-03 1.870 0.061477 .
## V14.. -1.261e+00 8.687e-01 -1.451 0.146647
## V14.Cambodia -2.165e-02 1.122e+00 -0.019 0.984608
## V14.Canada -1.081e+00 9.282e-01 -1.165 0.244126
## V14.China -2.365e+00 1.014e+00 -2.333 0.019646 *
## V14.Columbia -2.434e+00 1.350e+00 -1.803 0.071370 .
## V14.Cuba -2.517e-01 9.402e-01 -0.268 0.788946
## V14.Dominican.Republic -2.448e+00 1.379e+00 -1.775 0.075862 .
## V14.Ecuador -3.383e-01 1.170e+00 -0.289 0.772471
## V14.El.Salvador -1.134e+00 1.040e+00 -1.090 0.275655
## V14.England -5.701e-01 9.346e-01 -0.610 0.541868
## V14.France -8.438e-01 1.125e+00 -0.750 0.453229
## V14.Germany -3.181e-01 9.164e-01 -0.347 0.728528
## V14.Greece -2.236e+00 1.227e+00 -1.823 0.068329 .
## V14.Guatemala -9.200e-01 1.120e+00 -0.821 0.411456
## V14.Haiti -5.254e-01 1.101e+00 -0.477 0.633061
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 2.462e+01 4.015e+05 0.000 0.999951
## V14.Hong -1.924e+00 1.476e+00 -1.303 0.192448
## V14.Hungary -1.522e+00 1.448e+00 -1.051 0.293204
## V14.India -1.700e+00 9.754e-01 -1.743 0.081350 .
## V14.Iran -1.922e+00 1.135e+00 -1.693 0.090511 .
## V14.Ireland -8.795e-01 1.248e+00 -0.705 0.480868
## V14.Italy -6.023e-01 9.656e-01 -0.624 0.532777
## V14.Jamaica -5.346e-01 9.737e-01 -0.549 0.582947
## V14.Japan -8.683e-01 1.004e+00 -0.865 0.387015
## V14.Laos -2.194e+00 1.435e+00 -1.528 0.126423
## V14.Mexico -1.498e+00 8.854e-01 -1.691 0.090751 .
## V14.Nicaragua -1.320e+00 1.182e+00 -1.117 0.264098
## V14.Outlying.US.Guam.USVI.etc. -2.500e+01 1.811e+05 0.000 0.999890
## V14.Peru -1.708e+00 1.441e+00 -1.185 0.235994
## V14.Philippines -6.762e-01 9.112e-01 -0.742 0.458005
## V14.Poland -9.216e-01 9.850e-01 -0.936 0.349454
## V14.Portugal -1.939e+00 1.388e+00 -1.397 0.162338
## V14.Puerto.Rico -1.003e+00 9.656e-01 -1.038 0.299097
## V14.Scotland -7.464e-01 1.355e+00 -0.551 0.581801
## V14.South -1.846e+00 1.019e+00 -1.812 0.070008 .
## V14.Taiwan -1.104e+00 1.055e+00 -1.047 0.295222
## V14.Thailand -1.627e+00 1.500e+00 -1.085 0.277885
## V14.Trinadad.Tobago -9.675e-01 1.215e+00 -0.796 0.425941
## V14.United.States -9.622e-01 8.492e-01 -1.133 0.257193
## V14.Vietnam -1.750e+00 1.071e+00 -1.634 0.102245
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 14233 on 13239 degrees of freedom
## Residual deviance: 10681 on 13142 degrees of freedom
## AIC: 10877
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5118 1752
## >50K 2298 600
##
## Accuracy : 0.5854
## 95% CI : (0.5755, 0.5952)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0507
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6901
## Specificity : 0.2551
## Pos Pred Value : 0.7450
## Neg Pred Value : 0.2070
## Prevalence : 0.7592
## Detection Rate : 0.5240
## Detection Prevalence : 0.7033
## Balanced Accuracy : 0.4726
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5118 1752
## >50K 2298 600
##
## Accuracy : 0.5854
## 95% CI : (0.5755, 0.5952)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0507
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6901
## Specificity : 0.2551
## Pos Pred Value : 0.7450
## Neg Pred Value : 0.2070
## Prevalence : 0.7592
## Detection Rate : 0.5240
## Detection Prevalence : 0.7033
## Balanced Accuracy : 0.4726
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.853808e-01 -5.074639e-02 5.755355e-01 5.951752e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.091610e-17
ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6901294 0.2551020 0.7449782
## Neg Pred Value Precision Recall
## 0.2070393 0.7449782 0.6901294
## F1 Prevalence Detection Rate
## 0.7165057 0.7592138 0.5239558
## Detection Prevalence Balanced Accuracy
## 0.7033170 0.4726157
ad_tda_pc_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n3_lr_fit_re)
diff_tda_pca_5.50.5_lr_n3_3_fold
## Accuracy
## 1 0.1341645
## 2 0.1414115
## 3 0.1319020
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n3_3_fold$probRight
bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009433333
##
## $winRight
## [1] 0.9905667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_3_fold
## $left
## [1] 0.0002576845
##
## $rope
## [1] 8.833637e-05
##
## $right
## [1] 0.999654
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
#bf_tda_pca_5.50.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold)
## t = 47.358, df = 2, p-value = 0.0004456
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1234856 0.1481664
## sample estimates:
## mean of x
## 0.135826
### Test set diff
diff_tda_pca_5.50.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n3_test
## Accuracy
## 0.2661753
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_test_odds.left<-bst_tda_pca_5.50.5_lr.n3_test$probLeft/bst_tda_pca_5.50.5_lr.n3_test$probRight
bst_tda_pca_5.50.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1579667
##
## $winRight
## [1] 0.8420333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n3_test))
##Node4
Adult_TDA_PC_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n4_LrFit0
## Generalized Linear Model
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11134, 11133, 11133
## Resampling results:
##
## Accuracy Kappa
## 0.6692921 0.03635732
Adult_TDA_PC_5.50.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8474668 0.04113347 Fold1
## 2 0.8491108 0.05403147 Fold2
## 3 0.3112987 0.01390702 Fold3
ad_tda_pc_5.50.5_n4_lr_fit_re<-Adult_TDA_PC_5.50.5_n4_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.026e+12 1.131e+13 0.445 0.656642
## V1 2.109e-02 3.735e-03 5.647 1.63e-08 ***
## V2.. -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Federal.gov -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Local.gov -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Never.worked -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Private -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Self.emp.inc -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Self.emp.not.inc -5.026e+12 1.131e+13 -0.445 0.656642
## V2.State.gov -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Without.pay -5.026e+12 1.131e+13 -0.445 0.656642
## V3 7.685e-07 3.631e-07 2.116 0.034327 *
## V4.10th -1.539e+00 4.888e-01 -3.148 0.001642 **
## V4.11th -3.349e-01 3.036e-01 -1.103 0.270076
## V4.12th -8.699e-01 4.911e-01 -1.771 0.076505 .
## V4.1st.4th -2.287e+01 2.637e+04 -0.001 0.999308
## V4.5th.6th -9.827e-01 6.698e-01 -1.467 0.142306
## V4.7th.8th -9.913e-01 4.586e-01 -2.162 0.030647 *
## V4.9th -3.022e-01 4.128e-01 -0.732 0.464162
## V4.Assoc.acdm -1.521e-02 1.939e-01 -0.078 0.937472
## V4.Assoc.voc -2.068e-02 1.890e-01 -0.109 0.912866
## V4.Bachelors 3.228e-01 1.220e-01 2.646 0.008142 **
## V4.Doctorate 1.209e+00 3.656e-01 3.307 0.000943 ***
## V4.HS.grad -4.158e-01 1.105e-01 -3.763 0.000168 ***
## V4.Masters 4.200e-01 1.832e-01 2.292 0.021895 *
## V4.Preschool -2.166e+02 1.119e+07 0.000 0.999985
## V4.Prof.school 5.598e-01 3.690e-01 1.517 0.129205
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 1.284e-02 1.847e-01 0.070 0.944562
## V6.Married.AF.spouse 3.113e+00 7.265e-01 4.285 1.83e-05 ***
## V6.Married.civ.spouse 1.928e+00 4.073e-01 4.734 2.20e-06 ***
## V6.Married.spouse.absent 4.426e-02 3.222e-01 0.137 0.890729
## V6.Never.married -2.178e-01 2.053e-01 -1.061 0.288801
## V6.Separated -3.133e-01 2.699e-01 -1.161 0.245690
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 5.661e-02 2.573e-01 0.220 0.825873
## V7.Armed.Forces -2.308e+01 1.302e+05 0.000 0.999859
## V7.Craft.repair 1.553e-01 2.726e-01 0.570 0.568930
## V7.Exec.managerial 2.120e-01 2.630e-01 0.806 0.420201
## V7.Farming.fishing -2.666e+00 1.019e+00 -2.615 0.008920 **
## V7.Handlers.cleaners -7.790e-01 4.203e-01 -1.853 0.063838 .
## V7.Machine.op.inspct -7.407e-01 3.248e-01 -2.280 0.022592 *
## V7.Other.service -5.282e-01 2.812e-01 -1.878 0.060362 .
## V7.Priv.house.serv -4.245e+00 2.429e+00 -1.748 0.080495 .
## V7.Prof.specialty 3.726e-02 2.693e-01 0.138 0.889934
## V7.Protective.serv 7.033e-01 3.679e-01 1.912 0.055919 .
## V7.Sales 4.649e-02 2.667e-01 0.174 0.861630
## V7.Tech.support 2.779e-01 3.017e-01 0.921 0.357044
## V7.Transport.moving NA NA NA NA
## V8.Husband 4.241e+02 3.242e+05 0.001 0.998956
## V8.Not.in.family -1.275e-01 3.771e-01 -0.338 0.735328
## V8.Other.relative -1.607e+00 3.870e-01 -4.153 3.28e-05 ***
## V8.Own.child -1.321e+00 3.610e-01 -3.658 0.000254 ***
## V8.Unmarried -1.855e-01 3.874e-01 -0.479 0.632133
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 2.881e-01 3.548e-01 0.812 0.416757
## V9.Asian.Pac.Islander 4.901e-01 2.870e-01 1.707 0.087742 .
## V9.Black 1.134e-01 1.326e-01 0.855 0.392497
## V9.Other 6.152e-02 4.913e-01 0.125 0.900359
## V9.White NA NA NA NA
## V10.Female 2.783e-01 1.074e-01 2.590 0.009585 **
## V10.Male NA NA NA NA
## V11 3.618e-04 1.712e-05 21.131 < 2e-16 ***
## V12 3.504e-04 9.101e-05 3.850 0.000118 ***
## V13 2.382e-02 3.499e-03 6.809 9.82e-12 ***
## V14.. -2.180e+00 1.310e+00 -1.665 0.095997 .
## V14.Cambodia -2.357e+01 4.374e+04 -0.001 0.999570
## V14.Canada -2.264e+00 1.406e+00 -1.610 0.107299
## V14.China -2.090e+00 1.447e+00 -1.444 0.148672
## V14.Columbia -2.527e+01 5.220e+04 0.000 0.999614
## V14.Cuba -2.811e+00 1.498e+00 -1.877 0.060565 .
## V14.Dominican.Republic -2.739e+00 1.671e+00 -1.639 0.101231
## V14.Ecuador -2.487e+01 8.388e+04 0.000 0.999763
## V14.El.Salvador -2.753e+00 1.689e+00 -1.630 0.103163
## V14.England -2.255e+00 1.407e+00 -1.603 0.108979
## V14.France -2.673e+00 1.776e+00 -1.505 0.132281
## V14.Germany -2.223e+00 1.377e+00 -1.614 0.106540
## V14.Greece -1.724e+00 1.752e+00 -0.984 0.325093
## V14.Guatemala -4.375e-01 1.506e+00 -0.291 0.771422
## V14.Haiti -2.640e+00 1.704e+00 -1.549 0.121412
## V14.Holand.Netherlands -2.397e+01 3.370e+05 0.000 0.999943
## V14.Honduras -2.508e+01 1.011e+05 0.000 0.999802
## V14.Hong -2.605e+01 8.869e+04 0.000 0.999766
## V14.Hungary -1.846e+00 1.721e+00 -1.073 0.283395
## V14.India -2.515e+00 1.526e+00 -1.648 0.099286 .
## V14.Iran -2.600e+01 8.747e+04 0.000 0.999763
## V14.Ireland -1.923e+00 1.673e+00 -1.150 0.250346
## V14.Italy -1.033e+00 1.412e+00 -0.732 0.464434
## V14.Jamaica -1.851e+00 1.526e+00 -1.212 0.225324
## V14.Japan -1.023e+00 1.404e+00 -0.729 0.466262
## V14.Laos -2.286e+00 1.742e+00 -1.312 0.189437
## V14.Mexico -2.746e+00 1.369e+00 -2.006 0.044837 *
## V14.Nicaragua -1.642e+00 1.723e+00 -0.953 0.340633
## V14.Outlying.US.Guam.USVI.etc. -2.619e+01 9.667e+04 0.000 0.999784
## V14.Peru -2.527e+01 7.152e+04 0.000 0.999718
## V14.Philippines -2.303e+00 1.370e+00 -1.681 0.092735 .
## V14.Poland -2.459e+00 1.651e+00 -1.489 0.136359
## V14.Portugal -1.331e+00 1.537e+00 -0.866 0.386319
## V14.Puerto.Rico -2.014e+00 1.384e+00 -1.455 0.145592
## V14.Scotland -2.578e+00 1.821e+00 -1.416 0.156909
## V14.South -3.242e+00 1.538e+00 -2.108 0.035001 *
## V14.Taiwan -1.898e+00 1.527e+00 -1.243 0.213856
## V14.Thailand -2.681e+01 1.114e+05 0.000 0.999808
## V14.Trinadad.Tobago -2.591e+01 1.070e+05 0.000 0.999807
## V14.United.States -2.274e+00 1.281e+00 -1.776 0.075762 .
## V14.Vietnam -2.216e+00 1.562e+00 -1.419 0.155991
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7122.1 on 16699 degrees of freedom
## Residual deviance: 130876.7 on 16600 degrees of freedom
## AIC: 131077
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5178 443
## >50K 2238 1909
##
## Accuracy : 0.7255
## 95% CI : (0.7166, 0.7344)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.4045
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6982
## Specificity : 0.8116
## Pos Pred Value : 0.9212
## Neg Pred Value : 0.4603
## Prevalence : 0.7592
## Detection Rate : 0.5301
## Detection Prevalence : 0.5755
## Balanced Accuracy : 0.7549
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5178 443
## >50K 2238 1909
##
## Accuracy : 0.7255
## 95% CI : (0.7166, 0.7344)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.4045
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6982
## Specificity : 0.8116
## Pos Pred Value : 0.9212
## Neg Pred Value : 0.4603
## Prevalence : 0.7592
## Detection Rate : 0.5301
## Detection Prevalence : 0.5755
## Balanced Accuracy : 0.7549
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.255324e-01 4.044762e-01 7.165653e-01 7.343648e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 4.842908e-263
ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6982201 0.8116497 0.9211884
## Neg Pred Value Precision Recall
## 0.4603328 0.9211884 0.6982201
## F1 Prevalence Detection Rate
## 0.7943545 0.7592138 0.5300983
## Detection Prevalence Balanced Accuracy
## 0.5754505 0.7549349
ad_tda_pc_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n4_lr_fit_re)
diff_tda_pca_5.50.5_lr_n4_3_fold
## Accuracy
## 1 0.003020208
## 2 0.007148218
## 3 0.533134021
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n4_3_fold$probRight
bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.5825333
##
## $winRight
## [1] 0.4174667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_3_fold
## $left
## [1] 0.2231778
##
## $rope
## [1] 0.02107443
##
## $right
## [1] 0.7557478
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
#bf_tda_pca_5.50.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold)
## t = 1.0289, df = 2, p-value = 0.4117
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.5762549 0.9384565
## sample estimates:
## mean of x
## 0.1811008
### Test set diff
diff_tda_pca_5.50.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n4_test
## Accuracy
## 0.1260238
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_test_odds.left<-bst_tda_pca_5.50.5_lr.n4_test$probLeft/bst_tda_pca_5.50.5_lr.n4_test$probRight
bst_tda_pca_5.50.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1605
##
## $winRight
## [1] 0.8395
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n5_LrFit0
## Generalized Linear Model
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9602, 9604, 9602
## Resampling results:
##
## Accuracy Kappa
## 0.9967371 -0.001392465
Adult_TDA_PC_5.50.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9972928 -0.0009620627 Fold1
## 2 0.9975000 -0.0009383797 Fold2
## 3 0.9954186 -0.0022769534 Fold3
ad_tda_pc_5.50.5_n5_lr_fit_re<-Adult_TDA_PC_5.50.5_n5_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.162e+15 4.232e+07 -51101694 <2e-16 ***
## V1 2.376e+12 6.085e+04 39044136 <2e-16 ***
## V2.. -2.941e+15 2.417e+07 -121660805 <2e-16 ***
## V2.Federal.gov -1.699e+14 2.417e+07 -7030029 <2e-16 ***
## V2.Local.gov -2.387e+15 2.400e+07 -99452186 <2e-16 ***
## V2.Never.worked -5.359e+14 3.500e+07 -15312477 <2e-16 ***
## V2.Private -1.711e+15 2.386e+07 -71711904 <2e-16 ***
## V2.Self.emp.inc -1.060e+15 2.524e+07 -42001880 <2e-16 ***
## V2.Self.emp.not.inc -2.324e+15 2.413e+07 -96314772 <2e-16 ***
## V2.State.gov -2.249e+15 2.405e+07 -93530750 <2e-16 ***
## V2.Without.pay NA NA NA NA
## V3 1.936e+08 5.347e+00 36197956 <2e-16 ***
## V4.10th 8.384e+14 3.033e+06 276394991 <2e-16 ***
## V4.11th -2.572e+14 2.616e+06 -98302261 <2e-16 ***
## V4.12th 1.142e+15 3.992e+06 285991075 <2e-16 ***
## V4.1st.4th 4.872e+14 7.471e+06 65209586 <2e-16 ***
## V4.5th.6th 4.300e+14 5.646e+06 76171359 <2e-16 ***
## V4.7th.8th -2.477e+14 4.295e+06 -57658375 <2e-16 ***
## V4.9th 8.353e+14 4.195e+06 199139001 <2e-16 ***
## V4.Assoc.acdm 1.640e+15 3.285e+06 499144179 <2e-16 ***
## V4.Assoc.voc 1.068e+13 3.097e+06 3446569 <2e-16 ***
## V4.Bachelors 1.544e+15 2.227e+06 693399248 <2e-16 ***
## V4.Doctorate 1.841e+15 2.545e+07 72331489 <2e-16 ***
## V4.HS.grad -2.490e+14 1.468e+06 -169661737 <2e-16 ***
## V4.Masters -6.746e+14 4.802e+06 -140470411 <2e-16 ***
## V4.Preschool 5.415e+14 1.147e+07 47195269 <2e-16 ***
## V4.Prof.school 1.703e+15 1.519e+07 112129727 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 4.259e+14 2.874e+06 148178073 <2e-16 ***
## V6.Married.AF.spouse -1.543e+14 2.463e+07 -6265107 <2e-16 ***
## V6.Married.civ.spouse 1.217e+15 7.595e+06 160187500 <2e-16 ***
## V6.Married.spouse.absent 1.506e+13 4.792e+06 3142987 <2e-16 ***
## V6.Never.married 4.063e+14 3.141e+06 129339448 <2e-16 ***
## V6.Separated 3.580e+14 3.572e+06 100204898 <2e-16 ***
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -1.017e+15 3.743e+06 -271623876 <2e-16 ***
## V7.Armed.Forces -1.176e+15 3.396e+07 -34625127 <2e-16 ***
## V7.Craft.repair 6.660e+14 4.032e+06 165173011 <2e-16 ***
## V7.Exec.managerial -2.748e+14 4.233e+06 -64926351 <2e-16 ***
## V7.Farming.fishing 7.219e+14 5.332e+06 135380403 <2e-16 ***
## V7.Handlers.cleaners 7.555e+14 4.069e+06 185659303 <2e-16 ***
## V7.Machine.op.inspct 5.763e+13 4.033e+06 14288309 <2e-16 ***
## V7.Other.service 3.967e+13 3.697e+06 10731345 <2e-16 ***
## V7.Priv.house.serv -7.565e+14 6.687e+06 -113133292 <2e-16 ***
## V7.Prof.specialty -2.301e+14 4.284e+06 -53698560 <2e-16 ***
## V7.Protective.serv 5.842e+13 6.468e+06 9032610 <2e-16 ***
## V7.Sales 1.079e+14 3.869e+06 27884603 <2e-16 ***
## V7.Tech.support -1.244e+15 4.879e+06 -255023701 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband NA NA NA NA
## V8.Not.in.family 1.050e+15 7.815e+06 134312655 <2e-16 ***
## V8.Other.relative 6.155e+14 7.766e+06 79260148 <2e-16 ***
## V8.Own.child 6.824e+14 7.775e+06 87770147 <2e-16 ***
## V8.Unmarried 1.163e+15 7.891e+06 147418645 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 7.955e+14 5.009e+06 158791158 <2e-16 ***
## V9.Asian.Pac.Islander 5.606e+14 4.575e+06 122534488 <2e-16 ***
## V9.Black 5.857e+13 1.646e+06 35588559 <2e-16 ***
## V9.Other -1.631e+14 5.283e+06 -30878904 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female 1.778e+15 1.416e+06 1256171611 <2e-16 ***
## V10.Male NA NA NA NA
## V11 2.519e+10 6.411e+02 39287154 <2e-16 ***
## V12 6.275e+11 2.151e+03 291765144 <2e-16 ***
## V13 -4.315e+11 5.322e+04 -8106976 <2e-16 ***
## V14.. -1.353e+15 3.389e+07 -39929374 <2e-16 ***
## V14.Cambodia -1.630e+15 4.235e+07 -38482872 <2e-16 ***
## V14.Canada -1.194e+15 3.516e+07 -33962427 <2e-16 ***
## V14.China -3.368e+15 3.680e+07 -91523027 <2e-16 ***
## V14.Columbia -9.336e+14 3.529e+07 -26452174 <2e-16 ***
## V14.Cuba -1.658e+15 3.517e+07 -47133098 <2e-16 ***
## V14.Dominican.Republic -7.311e+14 3.493e+07 -20929588 <2e-16 ***
## V14.Ecuador -8.851e+14 3.764e+07 -23516833 <2e-16 ***
## V14.El.Salvador -1.400e+15 3.455e+07 -40520006 <2e-16 ***
## V14.England -9.049e+14 3.546e+07 -25518749 <2e-16 ***
## V14.France -1.156e+15 4.041e+07 -28616005 <2e-16 ***
## V14.Germany -1.720e+15 3.467e+07 -49613234 <2e-16 ***
## V14.Greece -1.254e+15 4.509e+07 -27813651 <2e-16 ***
## V14.Guatemala -1.687e+15 3.503e+07 -48141120 <2e-16 ***
## V14.Haiti -1.119e+15 3.557e+07 -31471154 <2e-16 ***
## V14.Holand.Netherlands -4.936e+15 7.528e+07 -65570303 <2e-16 ***
## V14.Honduras -1.554e+15 3.884e+07 -40018740 <2e-16 ***
## V14.Hong -1.433e+15 4.054e+07 -35339499 <2e-16 ***
## V14.Hungary -1.455e+14 4.509e+07 -3227333 <2e-16 ***
## V14.India -1.478e+15 3.687e+07 -40085617 <2e-16 ***
## V14.Iran -1.369e+14 4.335e+07 -3157781 <2e-16 ***
## V14.Ireland -2.012e+15 3.844e+07 -52329979 <2e-16 ***
## V14.Italy -1.160e+15 3.683e+07 -31487069 <2e-16 ***
## V14.Jamaica -8.793e+14 3.479e+07 -25275953 <2e-16 ***
## V14.Japan -1.230e+15 3.659e+07 -33630842 <2e-16 ***
## V14.Laos -1.349e+15 4.002e+07 -33707314 <2e-16 ***
## V14.Mexico -1.540e+15 3.383e+07 -45503793 <2e-16 ***
## V14.Nicaragua -4.598e+14 3.682e+07 -12488819 <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -1.919e+15 3.923e+07 -48921853 <2e-16 ***
## V14.Peru -3.046e+15 3.667e+07 -83061823 <2e-16 ***
## V14.Philippines -3.528e+15 3.460e+07 -101957990 <2e-16 ***
## V14.Poland -3.354e+15 3.630e+07 -92405163 <2e-16 ***
## V14.Portugal -1.466e+15 3.760e+07 -38980594 <2e-16 ***
## V14.Puerto.Rico -7.184e+14 3.454e+07 -20800085 <2e-16 ***
## V14.Scotland -3.348e+14 4.508e+07 -7426195 <2e-16 ***
## V14.South -1.565e+15 3.570e+07 -43840023 <2e-16 ***
## V14.Taiwan -3.826e+15 3.745e+07 -102162557 <2e-16 ***
## V14.Thailand -2.314e+15 3.946e+07 -58644892 <2e-16 ***
## V14.Trinadad.Tobago -1.090e+15 3.926e+07 -27767055 <2e-16 ***
## V14.United.States -2.224e+15 3.360e+07 -66189140 <2e-16 ***
## V14.Vietnam -1.710e+15 3.544e+07 -48267228 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 418.0 on 14403 degrees of freedom
## Residual deviance: 5550.7 on 14306 degrees of freedom
## AIC: 5746.7
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7389 2336
## >50K 27 16
##
## Accuracy : 0.7581
## 95% CI : (0.7495, 0.7666)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.608
##
## Kappa : 0.0048
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.996359
## Specificity : 0.006803
## Pos Pred Value : 0.759794
## Neg Pred Value : 0.372093
## Prevalence : 0.759214
## Detection Rate : 0.756450
## Detection Prevalence : 0.995598
## Balanced Accuracy : 0.501581
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7389 2336
## >50K 27 16
##
## Accuracy : 0.7581
## 95% CI : (0.7495, 0.7666)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.608
##
## Kappa : 0.0048
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.996359
## Specificity : 0.006803
## Pos Pred Value : 0.759794
## Neg Pred Value : 0.372093
## Prevalence : 0.759214
## Detection Rate : 0.756450
## Detection Prevalence : 0.995598
## Balanced Accuracy : 0.501581
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.758087633 0.004756105 0.749467767 0.766553357 0.759213759
## AccuracyPValue McnemarPValue
## 0.607970499 0.000000000
ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.996359223 0.006802721 0.759794344
## Neg Pred Value Precision Recall
## 0.372093023 0.759794344 0.996359223
## F1 Prevalence Detection Rate
## 0.862143399 0.759213759 0.756449631
## Detection Prevalence Balanced Accuracy
## 0.995597871 0.501580972
ad_tda_pc_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n5_lr_fit_re)
diff_tda_pca_5.50.5_lr_n5_3_fold
## Accuracy
## 1 -0.1468058
## 2 -0.1412410
## 3 -0.1509858
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n5_3_fold$probRight
bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.9906333
##
## $winRope
## [1] 0.009366667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_3_fold
## $left
## [1] 0.9997145
##
## $rope
## [1] 6.831844e-05
##
## $right
## [1] 0.000217144
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
#bf_tda_pca_5.50.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold)
## t = -51.848, df = 2, p-value = 0.0003718
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1584887 -0.1341997
## sample estimates:
## mean of x
## -0.1463442
### Test set diff
diff_tda_pca_5.50.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n5_test
## Accuracy
## 0.09346847
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_test_odds.left<-bst_tda_pca_5.50.5_lr.n5_test$probLeft/bst_tda_pca_5.50.5_lr.n5_test$probRight
bst_tda_pca_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1603
##
## $winRight
## [1] 0.8397
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n1_LrFit0
## Generalized Linear Model
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8926, 8924, 8924
## Resampling results:
##
## Accuracy Kappa
## 0.859117 0.6173693
Adult_TDA_KDE_5.50.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8590002 0.6140018 Fold1
## 2 0.8559265 0.6103198 Fold2
## 3 0.8624244 0.6277862 Fold3
ad_tda_kde_5.50.5_n1_lr_fit_re<-Adult_TDA_KDE_5.50.5_n1_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.158e+13 1.510e+13 -1.429 0.153005
## V1 1.441e-02 2.451e-03 5.878 4.15e-09 ***
## V2.. 2.158e+13 1.510e+13 1.429 0.153005
## V2.Federal.gov 2.158e+13 1.510e+13 1.429 0.153005
## V2.Local.gov 2.158e+13 1.510e+13 1.429 0.153005
## V2.Never.worked 2.158e+13 1.510e+13 1.429 0.153005
## V2.Private 2.158e+13 1.510e+13 1.429 0.153005
## V2.Self.emp.inc 2.158e+13 1.510e+13 1.429 0.153005
## V2.Self.emp.not.inc 2.158e+13 1.510e+13 1.429 0.153005
## V2.State.gov 2.158e+13 1.510e+13 1.429 0.153005
## V2.Without.pay 2.158e+13 1.510e+13 1.429 0.153005
## V3 6.893e-07 2.127e-07 3.241 0.001190 **
## V4.10th -1.169e+00 1.707e-01 -6.848 7.49e-12 ***
## V4.11th -1.106e+00 1.708e-01 -6.476 9.41e-11 ***
## V4.12th -8.562e-01 3.188e-01 -2.686 0.007236 **
## V4.1st.4th -1.741e+00 4.843e-01 -3.595 0.000325 ***
## V4.5th.6th -1.482e+00 3.069e-01 -4.828 1.38e-06 ***
## V4.7th.8th -1.618e+00 1.957e-01 -8.268 < 2e-16 ***
## V4.9th -1.325e+00 2.285e-01 -5.797 6.76e-09 ***
## V4.Assoc.acdm 1.387e-01 1.601e-01 0.866 0.386287
## V4.Assoc.voc 1.032e-01 1.585e-01 0.651 0.515109
## V4.Bachelors 5.937e-01 9.984e-02 5.947 2.74e-09 ***
## V4.Doctorate 1.835e+00 1.768e-01 10.379 < 2e-16 ***
## V4.HS.grad -4.623e-01 9.987e-02 -4.629 3.67e-06 ***
## V4.Masters 1.019e+00 1.205e-01 8.462 < 2e-16 ***
## V4.Preschool -3.217e+01 6.329e+04 -0.001 0.999594
## V4.Prof.school 1.589e+00 1.577e-01 10.082 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.860e-01 1.855e-01 -1.542 0.122995
## V6.Married.AF.spouse 2.264e+00 8.805e-01 2.571 0.010128 *
## V6.Married.civ.spouse 1.810e+00 4.553e-01 3.976 7.01e-05 ***
## V6.Married.spouse.absent -5.586e-01 3.806e-01 -1.468 0.142147
## V6.Never.married -8.128e-01 1.962e-01 -4.143 3.43e-05 ***
## V6.Separated -3.280e-01 2.735e-01 -1.199 0.230452
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -2.085e-01 1.650e-01 -1.263 0.206432
## V7.Armed.Forces -1.677e-01 2.205e+00 -0.076 0.939391
## V7.Craft.repair 7.394e-02 1.413e-01 0.523 0.600816
## V7.Exec.managerial 7.304e-01 1.424e-01 5.130 2.90e-07 ***
## V7.Farming.fishing -1.036e+00 2.129e-01 -4.866 1.14e-06 ***
## V7.Handlers.cleaners -8.105e-01 2.508e-01 -3.232 0.001231 **
## V7.Machine.op.inspct -6.159e-01 1.917e-01 -3.213 0.001312 **
## V7.Other.service -1.049e+00 2.076e-01 -5.053 4.35e-07 ***
## V7.Priv.house.serv -2.332e+01 2.443e+04 -0.001 0.999238
## V7.Prof.specialty 3.653e-01 1.491e-01 2.450 0.014300 *
## V7.Protective.serv 1.828e-01 2.277e-01 0.803 0.422010
## V7.Sales 1.044e-01 1.498e-01 0.697 0.485734
## V7.Tech.support 5.809e-01 2.033e-01 2.857 0.004278 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.270e+00 1.565e-01 -8.118 4.75e-16 ***
## V8.Not.in.family -6.745e-01 4.429e-01 -1.523 0.127787
## V8.Other.relative -1.488e+00 4.241e-01 -3.509 0.000450 ***
## V8.Own.child -1.820e+00 4.592e-01 -3.964 7.36e-05 ***
## V8.Unmarried -6.297e-01 4.557e-01 -1.382 0.166968
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.743e-01 3.185e-01 -0.547 0.584288
## V9.Asian.Pac.Islander 1.859e-01 2.667e-01 0.697 0.485786
## V9.Black -9.233e-02 1.164e-01 -0.793 0.427751
## V9.Other 4.619e-01 3.857e-01 1.198 0.231052
## V9.White NA NA NA NA
## V10.Female -8.920e-01 1.133e-01 -7.872 3.49e-15 ***
## V10.Male NA NA NA NA
## V11 3.005e-04 1.528e-05 19.668 < 2e-16 ***
## V12 6.511e-04 5.583e-05 11.662 < 2e-16 ***
## V13 3.122e-02 2.322e-03 13.445 < 2e-16 ***
## V14.. -1.280e+00 1.212e+00 -1.056 0.290857
## V14.Cambodia 3.755e-01 1.680e+00 0.223 0.823174
## V14.Canada -9.345e-01 1.246e+00 -0.750 0.453395
## V14.China -2.114e+00 1.306e+00 -1.619 0.105527
## V14.Columbia -2.620e+00 1.497e+00 -1.749 0.080212 .
## V14.Cuba -1.492e+00 1.276e+00 -1.169 0.242393
## V14.Dominican.Republic -2.395e+01 4.126e+04 -0.001 0.999537
## V14.Ecuador -2.571e+00 1.971e+00 -1.305 0.191940
## V14.El.Salvador -1.926e+00 1.451e+00 -1.327 0.184408
## V14.England -1.037e+00 1.265e+00 -0.819 0.412585
## V14.France -9.263e-01 1.358e+00 -0.682 0.495292
## V14.Germany -6.693e-01 1.267e+00 -0.528 0.597199
## V14.Greece -1.402e+00 1.463e+00 -0.958 0.338079
## V14.Guatemala -1.357e+00 1.556e+00 -0.872 0.383148
## V14.Haiti -2.065e+00 1.898e+00 -1.088 0.276500
## V14.Holand.Netherlands -2.328e+01 3.075e+05 0.000 0.999940
## V14.Honduras -1.547e+00 3.232e+00 -0.479 0.632153
## V14.Hong -6.725e-01 1.451e+00 -0.463 0.643089
## V14.Hungary -5.827e-01 1.639e+00 -0.355 0.722223
## V14.India -1.689e+00 1.283e+00 -1.317 0.187963
## V14.Iran -1.879e+00 1.447e+00 -1.298 0.194142
## V14.Ireland -5.711e-01 1.894e+00 -0.302 0.762982
## V14.Italy 5.082e-01 1.255e+00 0.405 0.685609
## V14.Jamaica -2.496e+00 1.621e+00 -1.540 0.123581
## V14.Japan -1.754e-01 1.370e+00 -0.128 0.898101
## V14.Laos -7.329e-01 1.570e+00 -0.467 0.640724
## V14.Mexico -1.573e+00 1.219e+00 -1.291 0.196677
## V14.Nicaragua -1.716e+00 1.449e+00 -1.184 0.236327
## V14.Outlying.US.Guam.USVI.etc. -2.564e+01 1.160e+05 0.000 0.999824
## V14.Peru -1.265e+00 1.641e+00 -0.771 0.440959
## V14.Philippines -1.387e+00 1.263e+00 -1.098 0.272027
## V14.Poland -7.809e-01 1.367e+00 -0.571 0.567687
## V14.Portugal -2.234e+00 1.721e+00 -1.298 0.194220
## V14.Puerto.Rico -4.165e-01 1.305e+00 -0.319 0.749604
## V14.Scotland 2.571e-01 1.775e+00 0.145 0.884829
## V14.South -2.715e+00 1.388e+00 -1.956 0.050460 .
## V14.Taiwan -5.682e-01 1.398e+00 -0.406 0.684404
## V14.Thailand -1.812e+00 1.906e+00 -0.951 0.341798
## V14.Trinadad.Tobago -1.127e+00 1.723e+00 -0.654 0.512878
## V14.United.States -9.105e-01 1.193e+00 -0.763 0.445470
## V14.Vietnam -1.315e+00 1.463e+00 -0.899 0.368657
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 15320.8 on 13386 degrees of freedom
## Residual deviance: 8281.5 on 13287 degrees of freedom
## AIC: 8481.5
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6969 1004
## >50K 447 1348
##
## Accuracy : 0.8515
## 95% CI : (0.8442, 0.8585)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.558
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9397
## Specificity : 0.5731
## Pos Pred Value : 0.8741
## Neg Pred Value : 0.7510
## Prevalence : 0.7592
## Detection Rate : 0.7135
## Detection Prevalence : 0.8162
## Balanced Accuracy : 0.7564
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6969 1004
## >50K 447 1348
##
## Accuracy : 0.8515
## 95% CI : (0.8442, 0.8585)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.558
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9397
## Specificity : 0.5731
## Pos Pred Value : 0.8741
## Neg Pred Value : 0.7510
## Prevalence : 0.7592
## Detection Rate : 0.7135
## Detection Prevalence : 0.8162
## Balanced Accuracy : 0.7564
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.514537e-01 5.579693e-01 8.442451e-01 8.584521e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.129564e-112 2.967542e-48
ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9397249 0.5731293 0.8740750
## Neg Pred Value Precision Recall
## 0.7509749 0.8740750 0.9397249
## F1 Prevalence Detection Rate
## 0.9057119 0.7592138 0.7134521
## Detection Prevalence Balanced Accuracy
## 0.8162367 0.7564271
ad_tda_kde_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n1_lr_fit_re)
diff_tda_kde_5.50.5_lr_n1_3_fold
## Accuracy
## 1 -0.0085132539
## 2 0.0003325428
## 3 -0.0179916328
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n1_3_fold$probRight
bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.1609
##
## $winRope
## [1] 0.8391
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_3_fold
## $left
## [1] 0.4269546
##
## $rope
## [1] 0.5270443
##
## $right
## [1] 0.04600108
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
#bf_tda_kde_5.50.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold)
## t = -1.6489, df = 2, p-value = 0.2409
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03148852 0.01404029
## sample estimates:
## mean of x
## -0.008724115
### Test set diff
diff_tda_kde_5.50.5_lr.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n1_test
## Accuracy
## -0.0004095004
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_test_odds.left<-bst_tda_kde_5.50.5_lr.n1_test$probLeft/bst_tda_kde_5.50.5_lr.n1_test$probRight
bst_tda_kde_5.50.5_lr.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n1_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n1_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n2_LrFit0
## Generalized Linear Model
##
## 12638 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8426, 8425, 8425
## Resampling results:
##
## Accuracy Kappa
## 0.8417463 0.5941028
Adult_TDA_KDE_5.50.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8311966 0.5697380 Fold1
## 2 0.8530738 0.6180946 Fold2
## 3 0.8409684 0.5944759 Fold3
ad_tda_kde_5.50.5_n2_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (15 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.377e+12 5.353e+12 0.444 0.656994
## V1 4.019e-02 2.998e-03 13.407 < 2e-16 ***
## V2.. -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Federal.gov -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Local.gov -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Never.worked -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Private -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Self.emp.inc -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Self.emp.not.inc -2.377e+12 5.353e+12 -0.444 0.656994
## V2.State.gov -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Without.pay -2.377e+12 5.353e+12 -0.444 0.656994
## V3 1.103e-06 3.002e-07 3.673 0.000240 ***
## V4.10th -1.293e+00 6.071e-01 -2.129 0.033222 *
## V4.11th -7.382e-01 2.070e-01 -3.566 0.000363 ***
## V4.12th -6.992e-01 3.145e-01 -2.223 0.026183 *
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 2.497e-01 1.403e-01 1.779 0.075165 .
## V4.Assoc.voc 1.348e-01 1.431e-01 0.942 0.346329
## V4.Bachelors 9.199e-01 9.162e-02 10.040 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -4.506e-01 8.894e-02 -5.067 4.05e-07 ***
## V4.Masters 1.352e+00 1.141e-01 11.857 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 2.484e+00 2.158e-01 11.509 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -4.044e-02 2.329e-01 -0.174 0.862155
## V6.Married.AF.spouse 3.066e+00 9.421e-01 3.254 0.001137 **
## V6.Married.civ.spouse 2.088e+00 5.310e-01 3.932 8.43e-05 ***
## V6.Married.spouse.absent 2.967e-02 3.779e-01 0.079 0.937413
## V6.Never.married -5.803e-01 2.445e-01 -2.374 0.017602 *
## V6.Separated -2.802e-01 3.178e-01 -0.882 0.377909
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.207e-02 1.627e-01 0.136 0.892080
## V7.Armed.Forces -2.415e+01 3.607e+05 0.000 0.999947
## V7.Craft.repair 1.250e-01 1.443e-01 0.867 0.386187
## V7.Exec.managerial 8.952e-01 1.436e-01 6.236 4.49e-10 ***
## V7.Farming.fishing -6.954e-01 2.187e-01 -3.180 0.001473 **
## V7.Handlers.cleaners -4.394e-01 2.579e-01 -1.704 0.088433 .
## V7.Machine.op.inspct -4.405e-01 1.941e-01 -2.270 0.023203 *
## V7.Other.service -1.026e+00 2.122e-01 -4.836 1.32e-06 ***
## V7.Priv.house.serv -3.433e+00 2.526e+00 -1.359 0.174160
## V7.Prof.specialty 5.321e-01 1.509e-01 3.527 0.000420 ***
## V7.Protective.serv 7.123e-01 2.190e-01 3.253 0.001142 **
## V7.Sales 2.879e-01 1.495e-01 1.926 0.054125 .
## V7.Tech.support 5.911e-01 1.928e-01 3.066 0.002173 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.440e+00 1.490e-01 -9.669 < 2e-16 ***
## V8.Not.in.family -8.639e-01 4.964e-01 -1.740 0.081799 .
## V8.Other.relative -1.702e+00 4.234e-01 -4.021 5.81e-05 ***
## V8.Own.child -1.897e+00 5.021e-01 -3.777 0.000159 ***
## V8.Unmarried -9.347e-01 5.071e-01 -1.843 0.065260 .
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -5.421e-03 3.121e-01 -0.017 0.986139
## V9.Asian.Pac.Islander -1.005e-01 2.368e-01 -0.425 0.671122
## V9.Black 2.333e-03 1.215e-01 0.019 0.984688
## V9.Other 1.334e-01 4.064e-01 0.328 0.742657
## V9.White NA NA NA NA
## V10.Female -8.916e-01 1.120e-01 -7.960 1.72e-15 ***
## V10.Male NA NA NA NA
## V11 3.295e-04 1.625e-05 20.279 < 2e-16 ***
## V12 7.852e-04 6.169e-05 12.727 < 2e-16 ***
## V13 2.469e-02 2.664e-03 9.267 < 2e-16 ***
## V14.. -4.861e-01 9.129e-01 -0.532 0.594403
## V14.Cambodia 1.803e+00 1.339e+00 1.346 0.178193
## V14.Canada -5.672e-02 9.926e-01 -0.057 0.954429
## V14.China -6.112e-01 1.074e+00 -0.569 0.569260
## V14.Columbia -2.500e+01 6.303e+04 0.000 0.999684
## V14.Cuba 4.934e-02 1.016e+00 0.049 0.961261
## V14.Dominican.Republic -2.431e+01 6.589e+04 0.000 0.999706
## V14.Ecuador -1.085e+00 1.642e+00 -0.661 0.508921
## V14.El.Salvador -2.494e-01 1.173e+00 -0.213 0.831555
## V14.England 5.221e-01 9.807e-01 0.532 0.594445
## V14.France 6.892e-01 1.203e+00 0.573 0.566757
## V14.Germany 7.369e-01 9.650e-01 0.764 0.445097
## V14.Greece -1.341e+00 1.203e+00 -1.115 0.264954
## V14.Guatemala -7.413e-01 2.216e+00 -0.334 0.738034
## V14.Haiti -5.211e-01 1.222e+00 -0.427 0.669732
## V14.Holand.Netherlands -2.230e+01 3.364e+05 0.000 0.999947
## V14.Honduras -2.309e+01 1.684e+05 0.000 0.999891
## V14.Hong 1.743e+00 1.363e+00 1.278 0.201085
## V14.Hungary 9.291e-01 1.599e+00 0.581 0.561302
## V14.India -3.241e-01 9.738e-01 -0.333 0.739287
## V14.Iran -9.642e-02 1.249e+00 -0.077 0.938485
## V14.Ireland -2.406e+01 1.255e+05 0.000 0.999847
## V14.Italy 9.493e-01 1.030e+00 0.921 0.356945
## V14.Jamaica -1.946e+00 1.368e+00 -1.423 0.154862
## V14.Japan 4.816e-01 1.096e+00 0.440 0.660258
## V14.Laos -2.472e+01 1.183e+05 0.000 0.999833
## V14.Mexico -6.107e-01 9.836e-01 -0.621 0.534691
## V14.Nicaragua -2.426e+01 8.981e+04 0.000 0.999785
## V14.Outlying.US.Guam.USVI.etc. -2.428e+01 1.889e+05 0.000 0.999897
## V14.Peru -3.972e-01 1.436e+00 -0.277 0.782121
## V14.Philippines 6.763e-01 9.607e-01 0.704 0.481468
## V14.Poland 3.535e-01 1.087e+00 0.325 0.745094
## V14.Portugal -2.412e+01 1.266e+05 0.000 0.999848
## V14.Puerto.Rico -3.442e-01 1.140e+00 -0.302 0.762585
## V14.Scotland 8.816e-03 1.404e+00 0.006 0.994991
## V14.South -1.041e+00 1.069e+00 -0.974 0.330195
## V14.Taiwan 1.561e-01 1.085e+00 0.144 0.885539
## V14.Thailand -8.911e-01 1.498e+00 -0.595 0.551914
## V14.Trinadad.Tobago -3.699e-01 1.763e+00 -0.210 0.833792
## V14.United.States 1.391e-01 8.881e-01 0.157 0.875548
## V14.Vietnam -1.970e+00 1.455e+00 -1.354 0.175700
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 14967.3 on 12637 degrees of freedom
## Residual deviance: 8297.9 on 12544 degrees of freedom
## AIC: 8485.9
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6903 985
## >50K 513 1367
##
## Accuracy : 0.8466
## 95% CI : (0.8393, 0.8537)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5497
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9308
## Specificity : 0.5812
## Pos Pred Value : 0.8751
## Neg Pred Value : 0.7271
## Prevalence : 0.7592
## Detection Rate : 0.7067
## Detection Prevalence : 0.8075
## Balanced Accuracy : 0.7560
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6903 985
## >50K 513 1367
##
## Accuracy : 0.8466
## 95% CI : (0.8393, 0.8537)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5497
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9308
## Specificity : 0.5812
## Pos Pred Value : 0.8751
## Neg Pred Value : 0.7271
## Prevalence : 0.7592
## Detection Rate : 0.7067
## Detection Prevalence : 0.8075
## Balanced Accuracy : 0.7560
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.466421e-01 5.496963e-01 8.393419e-01 8.537349e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.790746e-100 4.530430e-34
ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9308252 0.5812075 0.8751268
## Neg Pred Value Precision Recall
## 0.7271277 0.8751268 0.9308252
## F1 Prevalence Detection Rate
## 0.9021171 0.7592138 0.7066953
## Detection Prevalence Balanced Accuracy
## 0.8075348 0.7560164
ad_tda_kde_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n2_lr_fit_re)
diff_tda_kde_5.50.5_lr_n2_3_fold
## Accuracy
## 1 0.019290389
## 2 0.003185230
## 3 0.003464314
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n2_3_fold$probRight
bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.6732333
##
## $winRight
## [1] 0.3267667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_3_fold
## $left
## [1] 0.04681356
##
## $rope
## [1] 0.5301139
##
## $right
## [1] 0.4230726
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
#bf_tda_kde_5.50.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold)
## t = 1.6246, df = 2, p-value = 0.2458
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01425415 0.03154744
## sample estimates:
## mean of x
## 0.008646645
### Test set diff
diff_tda_kde_5.50.5_lr.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n2_test
## Accuracy
## 0.004402129
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_test_odds.left<-bst_tda_kde_5.50.5_lr.n2_test$probLeft/bst_tda_kde_5.50.5_lr.n2_test$probRight
bst_tda_kde_5.50.5_lr.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n2_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n3_LrFit0
## Generalized Linear Model
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7756, 7756, 7756
## Resampling results:
##
## Accuracy Kappa
## 0.8318721 0.565766
Adult_TDA_KDE_5.50.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8388345 0.5812742 Fold1
## 2 0.8274884 0.5520649 Fold2
## 3 0.8292935 0.5639590 Fold3
ad_tda_kde_5.50.5_n3_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (18 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.210e+13 1.132e+13 -1.068 0.285486
## V1 5.716e-02 3.707e-03 15.419 < 2e-16 ***
## V2.. 1.210e+13 1.132e+13 1.068 0.285486
## V2.Federal.gov 1.210e+13 1.132e+13 1.068 0.285486
## V2.Local.gov 1.210e+13 1.132e+13 1.068 0.285486
## V2.Never.worked -4.492e+15 1.132e+13 -396.606 < 2e-16 ***
## V2.Private 1.210e+13 1.132e+13 1.068 0.285486
## V2.Self.emp.inc 1.210e+13 1.132e+13 1.068 0.285486
## V2.Self.emp.not.inc 1.210e+13 1.132e+13 1.068 0.285486
## V2.State.gov 1.210e+13 1.132e+13 1.068 0.285486
## V2.Without.pay 1.210e+13 1.132e+13 1.068 0.285486
## V3 1.058e-06 4.265e-07 2.480 0.013122 *
## V4.10th NA NA NA NA
## V4.11th -2.141e+01 1.391e+05 0.000 0.999877
## V4.12th -3.019e-01 3.262e-01 -0.926 0.354645
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 4.043e-01 1.394e-01 2.901 0.003723 **
## V4.Assoc.voc 3.090e-01 1.336e-01 2.313 0.020710 *
## V4.Bachelors 1.133e+00 9.265e-02 12.228 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -2.567e-01 8.674e-02 -2.959 0.003087 **
## V4.Masters 1.542e+00 1.396e-01 11.048 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.761e-02 3.077e-01 -0.090 0.928514
## V6.Married.AF.spouse 3.429e+00 1.082e+00 3.170 0.001525 **
## V6.Married.civ.spouse 2.257e+00 5.151e-01 4.382 1.18e-05 ***
## V6.Married.spouse.absent 1.001e-01 4.357e-01 0.230 0.818365
## V6.Never.married -3.595e-01 3.181e-01 -1.130 0.258384
## V6.Separated -2.111e-01 3.891e-01 -0.543 0.587345
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.406e-01 1.626e-01 1.480 0.139002
## V7.Armed.Forces -2.416e+01 1.729e+05 0.000 0.999888
## V7.Craft.repair 1.624e-01 1.439e-01 1.129 0.258979
## V7.Exec.managerial 1.016e+00 1.458e-01 6.970 3.17e-12 ***
## V7.Farming.fishing -8.835e-01 2.453e-01 -3.602 0.000316 ***
## V7.Handlers.cleaners -4.339e-01 2.492e-01 -1.741 0.081600 .
## V7.Machine.op.inspct -5.824e-02 1.761e-01 -0.331 0.740869
## V7.Other.service -6.660e-01 2.097e-01 -3.176 0.001495 **
## V7.Priv.house.serv -3.271e+00 2.440e+00 -1.341 0.180045
## V7.Prof.specialty 6.993e-01 1.540e-01 4.541 5.60e-06 ***
## V7.Protective.serv 1.032e+00 2.144e-01 4.812 1.49e-06 ***
## V7.Sales 6.145e-01 1.496e-01 4.106 4.02e-05 ***
## V7.Tech.support 7.322e-01 1.889e-01 3.876 0.000106 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.456e+00 1.601e-01 -9.091 < 2e-16 ***
## V8.Not.in.family -9.264e-01 4.390e-01 -2.110 0.034857 *
## V8.Other.relative -1.820e+00 3.930e-01 -4.629 3.67e-06 ***
## V8.Own.child -2.250e+00 4.251e-01 -5.293 1.20e-07 ***
## V8.Unmarried -1.128e+00 4.525e-01 -2.493 0.012654 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -4.874e-01 3.861e-01 -1.262 0.206910
## V9.Asian.Pac.Islander 4.647e-02 2.241e-01 0.207 0.835706
## V9.Black -1.506e-01 1.218e-01 -1.237 0.216218
## V9.Other -6.531e-01 4.634e-01 -1.409 0.158715
## V9.White NA NA NA NA
## V10.Female -8.174e-01 1.260e-01 -6.489 8.64e-11 ***
## V10.Male NA NA NA NA
## V11 3.215e-04 1.673e-05 19.219 < 2e-16 ***
## V12 6.916e-04 6.360e-05 10.875 < 2e-16 ***
## V13 2.419e-02 2.892e-03 8.366 < 2e-16 ***
## V14.. -4.034e-01 1.001e+00 -0.403 0.687010
## V14.Cambodia 1.621e+00 1.309e+00 1.239 0.215505
## V14.Canada 7.824e-01 1.077e+00 0.726 0.467623
## V14.China -9.027e-01 1.176e+00 -0.768 0.442732
## V14.Columbia -2.441e+01 5.758e+04 0.000 0.999662
## V14.Cuba 1.205e+00 1.123e+00 1.073 0.283390
## V14.Dominican.Republic -7.977e-01 1.486e+00 -0.537 0.591305
## V14.Ecuador -1.104e+00 1.607e+00 -0.687 0.492121
## V14.El.Salvador 8.271e-02 1.280e+00 0.065 0.948471
## V14.England 4.644e-01 1.095e+00 0.424 0.671504
## V14.France 1.409e+00 1.308e+00 1.078 0.281142
## V14.Germany 4.124e-01 1.048e+00 0.394 0.693942
## V14.Greece -1.840e+00 1.297e+00 -1.419 0.155846
## V14.Guatemala -2.407e+01 9.554e+04 0.000 0.999799
## V14.Haiti -6.195e-02 1.258e+00 -0.049 0.960732
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.330e+01 1.741e+05 0.000 0.999893
## V14.Hong -2.201e+01 2.388e+05 0.000 0.999926
## V14.Hungary -7.904e-01 1.531e+00 -0.516 0.605591
## V14.India -4.675e-01 1.080e+00 -0.433 0.665103
## V14.Iran 1.600e-02 1.127e+00 0.014 0.988670
## V14.Ireland 1.424e+00 1.373e+00 1.037 0.299660
## V14.Italy -6.461e-02 1.132e+00 -0.057 0.954474
## V14.Jamaica 1.388e-02 1.222e+00 0.011 0.990940
## V14.Japan 4.253e-01 1.167e+00 0.364 0.715511
## V14.Laos -2.419e+01 1.483e+05 0.000 0.999870
## V14.Mexico -7.942e-01 1.153e+00 -0.689 0.490896
## V14.Nicaragua -2.163e+01 1.614e+05 0.000 0.999893
## V14.Outlying.US.Guam.USVI.etc. -2.304e+01 1.849e+05 0.000 0.999901
## V14.Peru -1.213e+00 1.633e+00 -0.743 0.457765
## V14.Philippines 1.096e+00 1.052e+00 1.042 0.297548
## V14.Poland -1.086e-01 1.139e+00 -0.095 0.923996
## V14.Portugal 5.104e-01 1.356e+00 0.376 0.706689
## V14.Puerto.Rico -1.235e+00 1.178e+00 -1.048 0.294454
## V14.Scotland -5.170e-01 1.647e+00 -0.314 0.753623
## V14.South -1.024e+00 1.172e+00 -0.873 0.382456
## V14.Taiwan -4.563e-01 1.221e+00 -0.374 0.708564
## V14.Thailand -8.318e-01 1.654e+00 -0.503 0.614977
## V14.Trinadad.Tobago -4.545e-01 1.754e+00 -0.259 0.795488
## V14.United.States 1.502e-01 9.798e-01 0.153 0.878157
## V14.Vietnam -2.492e+00 1.491e+00 -1.671 0.094637 .
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13708.0 on 11633 degrees of freedom
## Residual deviance: 7850.4 on 11543 degrees of freedom
## AIC: 8032.4
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6768 915
## >50K 648 1437
##
## Accuracy : 0.84
## 95% CI : (0.8326, 0.8472)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5447
##
## Mcnemar's Test P-Value : 1.717e-11
##
## Sensitivity : 0.9126
## Specificity : 0.6110
## Pos Pred Value : 0.8809
## Neg Pred Value : 0.6892
## Prevalence : 0.7592
## Detection Rate : 0.6929
## Detection Prevalence : 0.7865
## Balanced Accuracy : 0.7618
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6768 915
## >50K 648 1437
##
## Accuracy : 0.84
## 95% CI : (0.8326, 0.8472)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5447
##
## Mcnemar's Test P-Value : 1.717e-11
##
## Sensitivity : 0.9126
## Specificity : 0.6110
## Pos Pred Value : 0.8809
## Neg Pred Value : 0.6892
## Prevalence : 0.7592
## Detection Rate : 0.6929
## Detection Prevalence : 0.7865
## Balanced Accuracy : 0.7618
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.399877e-01 5.447027e-01 8.325650e-01 8.472071e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.681244e-85 1.717152e-11
ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9126214 0.6109694 0.8809059
## Neg Pred Value Precision Recall
## 0.6892086 0.8809059 0.9126214
## F1 Prevalence Detection Rate
## 0.8964832 0.7592138 0.6928747
## Detection Prevalence Balanced Accuracy
## 0.7865479 0.7617954
ad_tda_kde_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n3_lr_fit_re)
diff_tda_kde_5.50.5_lr_n3_3_fold
## Accuracy
## 1 0.019290389
## 2 0.003185230
## 3 0.003464314
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n3_3_fold$probRight
bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.6711
##
## $winRight
## [1] 0.3289
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_3_fold
## $left
## [1] 0.04681356
##
## $rope
## [1] 0.5301139
##
## $right
## [1] 0.4230726
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
#bf_tda_kde_5.50.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold)
## t = 1.6246, df = 2, p-value = 0.2458
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01425415 0.03154744
## sample estimates:
## mean of x
## 0.008646645
### Test set diff
diff_tda_kde_5.50.5_lr.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n3_test
## Accuracy
## 0.01105651
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_test_odds.left<-bst_tda_kde_5.50.5_lr.n3_test$probLeft/bst_tda_kde_5.50.5_lr.n3_test$probRight
bst_tda_kde_5.50.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n3_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n4_LrFit0
## Generalized Linear Model
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6691, 6693, 6692
## Resampling results:
##
## Accuracy Kappa
## 0.5831072 0.1337313
Adult_TDA_KDE_5.50.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.6295190 -0.10484924 Fold1
## 2 0.8597907 0.54704909 Fold2
## 3 0.2600120 -0.04100596 Fold3
ad_tda_kde_5.50.5_n4_lr_fit_re<-Adult_TDA_KDE_5.50.5_n4_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (19 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.260e+12 1.517e+13 -0.083 0.933819
## V1 7.282e-02 4.998e-03 14.569 < 2e-16 ***
## V2.. 7.797e+12 2.097e+13 0.372 0.710086
## V2.Federal.gov 7.797e+12 2.096e+13 0.372 0.709945
## V2.Local.gov 7.797e+12 2.098e+13 0.372 0.710183
## V2.Never.worked -4.496e+15 2.096e+13 -214.464 < 2e-16 ***
## V2.Private 7.797e+12 2.097e+13 0.372 0.710089
## V2.Self.emp.inc 7.797e+12 2.095e+13 0.372 0.709801
## V2.Self.emp.not.inc 7.797e+12 2.096e+13 0.372 0.709943
## V2.State.gov 7.797e+12 2.096e+13 0.372 0.709964
## V2.Without.pay -4.496e+15 2.099e+13 -214.207 < 2e-16 ***
## V3 1.580e-06 6.297e-07 2.509 0.012114 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th -2.427e-01 4.229e-01 -0.574 0.566027
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 4.819e-01 1.934e-01 2.491 0.012730 *
## V4.Assoc.voc 4.399e-01 1.318e-01 3.337 0.000846 ***
## V4.Bachelors 1.112e+00 1.107e-01 10.048 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -2.108e-01 8.542e-02 -2.468 0.013604 *
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -3.636e-01 5.024e-01 -0.724 0.469302
## V6.Married.AF.spouse 2.500e+00 1.009e+00 2.478 0.013212 *
## V6.Married.civ.spouse 1.809e+00 6.901e-01 2.621 0.008767 **
## V6.Married.spouse.absent -1.916e-02 6.769e-01 -0.028 0.977419
## V6.Never.married -4.782e-01 5.110e-01 -0.936 0.349334
## V6.Separated -6.317e-01 5.843e-01 -1.081 0.279634
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 3.182e-01 1.760e-01 1.808 0.070658 .
## V7.Armed.Forces -1.233e+01 4.244e+02 -0.029 0.976824
## V7.Craft.repair 2.127e-01 1.511e-01 1.408 0.159250
## V7.Exec.managerial 9.917e-01 1.593e-01 6.226 4.80e-10 ***
## V7.Farming.fishing -8.439e-01 2.869e-01 -2.942 0.003263 **
## V7.Handlers.cleaners -5.814e-01 2.561e-01 -2.270 0.023184 *
## V7.Machine.op.inspct -7.993e-02 1.823e-01 -0.439 0.660993
## V7.Other.service -3.570e-01 2.137e-01 -1.670 0.094864 .
## V7.Priv.house.serv -1.786e+01 3.461e+03 -0.005 0.995883
## V7.Prof.specialty 8.173e-01 1.771e-01 4.614 3.96e-06 ***
## V7.Protective.serv 9.216e-01 2.343e-01 3.933 8.39e-05 ***
## V7.Sales 6.051e-01 1.617e-01 3.743 0.000182 ***
## V7.Tech.support 8.322e-01 2.095e-01 3.972 7.13e-05 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.545e+00 2.175e-01 -7.105 1.20e-12 ***
## V8.Not.in.family -1.300e+00 5.183e-01 -2.508 0.012158 *
## V8.Other.relative -2.250e+00 5.009e-01 -4.492 7.06e-06 ***
## V8.Own.child -2.508e+00 4.923e-01 -5.093 3.52e-07 ***
## V8.Unmarried -1.627e+00 5.374e-01 -3.027 0.002473 **
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.181e+00 5.091e-01 -2.319 0.020394 *
## V9.Asian.Pac.Islander 4.669e-01 2.799e-01 1.668 0.095275 .
## V9.Black -3.876e-01 1.459e-01 -2.657 0.007890 **
## V9.Other -1.463e+00 6.704e-01 -2.183 0.029036 *
## V9.White NA NA NA NA
## V10.Female -8.349e-01 1.911e-01 -4.370 1.24e-05 ***
## V10.Male NA NA NA NA
## V11 3.266e-04 2.052e-05 15.916 < 2e-16 ***
## V12 5.521e-04 7.073e-05 7.805 5.93e-15 ***
## V13 3.011e-02 3.387e-03 8.890 < 2e-16 ***
## V14.. -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Cambodia -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Canada -6.537e+12 2.170e+13 -0.301 0.763212
## V14.China -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Columbia -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Cuba -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Dominican.Republic -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Ecuador -6.537e+12 2.170e+13 -0.301 0.763212
## V14.El.Salvador -6.537e+12 2.170e+13 -0.301 0.763212
## V14.England -6.537e+12 2.170e+13 -0.301 0.763212
## V14.France -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Germany -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Greece -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Guatemala -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Haiti -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Hong -4.510e+15 2.170e+13 -207.852 < 2e-16 ***
## V14.Hungary -6.537e+12 2.170e+13 -0.301 0.763212
## V14.India -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Iran -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Ireland -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Italy -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Jamaica -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Japan -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Laos -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Mexico -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Nicaragua -4.510e+15 2.170e+13 -207.852 < 2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -4.510e+15 2.170e+13 -207.852 < 2e-16 ***
## V14.Peru -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Philippines -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Poland -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Portugal -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Puerto.Rico -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Scotland -6.537e+12 2.170e+13 -0.301 0.763212
## V14.South -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Taiwan -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Thailand -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Trinadad.Tobago -6.537e+12 2.170e+13 -0.301 0.763212
## V14.United.States -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Vietnam -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Yugoslavia -6.537e+12 2.170e+13 -0.301 0.763212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10294.3 on 10037 degrees of freedom
## Residual deviance: 5791.2 on 9948 degrees of freedom
## AIC: 5971.2
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6608 838
## >50K 808 1514
##
## Accuracy : 0.8315
## 95% CI : (0.8239, 0.8389)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5371
##
## Mcnemar's Test P-Value : 0.4747
##
## Sensitivity : 0.8910
## Specificity : 0.6437
## Pos Pred Value : 0.8875
## Neg Pred Value : 0.6520
## Prevalence : 0.7592
## Detection Rate : 0.6765
## Detection Prevalence : 0.7623
## Balanced Accuracy : 0.7674
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6608 838
## >50K 808 1514
##
## Accuracy : 0.8315
## 95% CI : (0.8239, 0.8389)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5371
##
## Mcnemar's Test P-Value : 0.4747
##
## Sensitivity : 0.8910
## Specificity : 0.6437
## Pos Pred Value : 0.8875
## Neg Pred Value : 0.6520
## Prevalence : 0.7592
## Detection Rate : 0.6765
## Detection Prevalence : 0.7623
## Balanced Accuracy : 0.7674
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.314906e-01 5.370929e-01 8.239177e-01 8.388652e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.115193e-68 4.747341e-01
ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8910464 0.6437075 0.8874564
## Neg Pred Value Precision Recall
## 0.6520241 0.8874564 0.8910464
## F1 Prevalence Detection Rate
## 0.8892477 0.7592138 0.6764947
## Detection Prevalence Balanced Accuracy
## 0.7622850 0.7673769
ad_tda_kde_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n4_lr_fit_re)
diff_tda_kde_5.50.5_lr_n4_3_fold
## Accuracy
## 1 0.220967998
## 2 -0.003531683
## 3 0.584420791
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n4_3_fold$probRight
bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1454333
##
## $winRight
## [1] 0.8545667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_3_fold
## $left
## [1] 0.1480025
##
## $rope
## [1] 0.01351726
##
## $right
## [1] 0.8384802
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
#bf_tda_kde_5.50.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold)
## t = 1.5603, df = 2, p-value = 0.2591
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.4697585 1.0043299
## sample estimates:
## mean of x
## 0.2672857
### Test set diff
diff_tda_kde_5.50.5_lr.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n4_test
## Accuracy
## 0.01955364
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_test_odds.left<-bst_tda_kde_5.50.5_lr.n4_test$probLeft/bst_tda_kde_5.50.5_lr.n4_test$probRight
bst_tda_kde_5.50.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4607667
##
## $winRight
## [1] 0.5392333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n4_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n5_LrFit0
## Generalized Linear Model
##
## 7540 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5027, 5027, 5026
## Resampling results:
##
## Accuracy Kappa
## 0.8665779 0.3834578
Adult_TDA_KDE_5.50.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8627139 0.3699141 Fold1
## 2 0.8678870 0.4033372 Fold2
## 3 0.8691329 0.3771220 Fold3
ad_tda_kde_5.50.5_n5_lr_fit_re<-Adult_TDA_KDE_5.50.5_n5_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (23 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.531e+13 1.949e+13 -0.786 0.432144
## V1 7.835e-02 6.899e-03 11.357 < 2e-16 ***
## V2.. 1.531e+13 1.949e+13 0.786 0.432144
## V2.Federal.gov 1.531e+13 1.949e+13 0.786 0.432144
## V2.Local.gov 1.531e+13 1.949e+13 0.786 0.432144
## V2.Never.worked -4.488e+15 1.949e+13 -230.295 < 2e-16 ***
## V2.Private 1.531e+13 1.949e+13 0.786 0.432144
## V2.Self.emp.inc 1.531e+13 1.949e+13 0.786 0.432144
## V2.Self.emp.not.inc 1.531e+13 1.949e+13 0.786 0.432144
## V2.State.gov 1.531e+13 1.949e+13 0.786 0.432144
## V2.Without.pay 1.531e+13 1.949e+13 0.786 0.432144
## V3 2.099e-06 9.540e-07 2.200 0.027778 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th NA NA NA NA
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm NA NA NA NA
## V4.Assoc.voc 4.025e-01 1.816e-01 2.216 0.026663 *
## V4.Bachelors NA NA NA NA
## V4.Doctorate NA NA NA NA
## V4.HS.grad -3.119e-01 8.559e-02 -3.644 0.000268 ***
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.283e-01 1.066e+00 -0.214 0.830365
## V6.Married.AF.spouse 2.486e+00 1.507e+00 1.650 0.098997 .
## V6.Married.civ.spouse 2.141e+00 1.174e+00 1.825 0.068066 .
## V6.Married.spouse.absent -2.802e-01 1.305e+00 -0.215 0.829953
## V6.Never.married -5.760e-01 1.068e+00 -0.539 0.589690
## V6.Separated -4.894e-01 1.139e+00 -0.430 0.667446
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 3.736e-01 2.086e-01 1.791 0.073227 .
## V7.Armed.Forces -2.274e+01 1.525e+05 0.000 0.999881
## V7.Craft.repair 2.768e-01 1.663e-01 1.664 0.096174 .
## V7.Exec.managerial 8.350e-01 1.840e-01 4.537 5.69e-06 ***
## V7.Farming.fishing -3.688e-01 3.161e-01 -1.167 0.243340
## V7.Handlers.cleaners -4.164e-01 2.677e-01 -1.556 0.119827
## V7.Machine.op.inspct 6.750e-02 1.994e-01 0.339 0.734898
## V7.Other.service -3.180e-01 2.459e-01 -1.293 0.196035
## V7.Priv.house.serv -2.393e+01 7.364e+04 0.000 0.999741
## V7.Prof.specialty 1.189e+00 2.334e-01 5.093 3.52e-07 ***
## V7.Protective.serv 8.446e-01 2.740e-01 3.082 0.002056 **
## V7.Sales 4.039e-01 1.859e-01 2.173 0.029765 *
## V7.Tech.support 1.113e+00 2.595e-01 4.291 1.78e-05 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.278e+00 3.035e-01 -4.212 2.53e-05 ***
## V8.Not.in.family -7.738e-01 6.013e-01 -1.287 0.198168
## V8.Other.relative -1.731e+00 6.119e-01 -2.830 0.004662 **
## V8.Own.child -1.756e+00 5.661e-01 -3.101 0.001926 **
## V8.Unmarried -1.727e+00 6.715e-01 -2.573 0.010095 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.486e+00 5.768e-01 -2.576 0.009981 **
## V9.Asian.Pac.Islander 5.260e-01 4.353e-01 1.208 0.226894
## V9.Black -5.663e-01 1.998e-01 -2.834 0.004591 **
## V9.Other -2.357e+00 1.110e+00 -2.124 0.033646 *
## V9.White NA NA NA NA
## V10.Female -6.602e-01 2.631e-01 -2.509 0.012094 *
## V10.Male NA NA NA NA
## V11 3.697e-04 2.660e-05 13.899 < 2e-16 ***
## V12 5.408e-04 8.660e-05 6.244 4.26e-10 ***
## V13 2.854e-02 4.059e-03 7.032 2.04e-12 ***
## V14.. -1.407e+00 1.435e+00 -0.980 0.326858
## V14.Cambodia -2.607e+01 1.507e+05 0.000 0.999862
## V14.Canada -1.840e+00 1.800e+00 -1.022 0.306601
## V14.China -1.393e+00 1.707e+00 -0.816 0.414689
## V14.Columbia -2.562e+01 8.344e+04 0.000 0.999755
## V14.Cuba 3.551e-01 1.558e+00 0.228 0.819681
## V14.Dominican.Republic -1.727e+01 1.399e+03 -0.012 0.990148
## V14.Ecuador -1.909e-03 1.691e+00 -0.001 0.999099
## V14.El.Salvador -2.776e+00 1.783e+00 -1.557 0.119535
## V14.England -6.973e-01 1.660e+00 -0.420 0.674469
## V14.France -2.309e+01 1.625e+05 0.000 0.999887
## V14.Germany -8.534e-01 1.495e+00 -0.571 0.568216
## V14.Greece -1.636e+00 2.115e+00 -0.774 0.439051
## V14.Guatemala -3.395e-01 1.813e+00 -0.187 0.851448
## V14.Haiti -2.321e+01 1.092e+05 0.000 0.999830
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.470e+01 2.452e+05 0.000 0.999920
## V14.Hong -2.668e+01 1.398e+05 0.000 0.999848
## V14.Hungary -2.539e+01 3.120e+05 0.000 0.999935
## V14.India -2.833e+00 2.209e+00 -1.282 0.199724
## V14.Iran 2.252e-01 1.758e+00 0.128 0.898083
## V14.Ireland -1.216e+00 1.790e+00 -0.679 0.496897
## V14.Italy -2.567e+00 1.866e+00 -1.376 0.168826
## V14.Jamaica -1.321e-01 1.564e+00 -0.084 0.932709
## V14.Japan -2.613e+01 8.646e+04 0.000 0.999759
## V14.Laos -2.666e+01 1.732e+05 0.000 0.999877
## V14.Mexico -1.881e+00 1.498e+00 -1.255 0.209357
## V14.Nicaragua -2.527e+01 1.007e+05 0.000 0.999800
## V14.Outlying.US.Guam.USVI.etc. -2.399e+01 1.271e+05 0.000 0.999849
## V14.Peru -2.576e+01 1.247e+05 0.000 0.999835
## V14.Philippines -7.225e-01 1.581e+00 -0.457 0.647625
## V14.Poland -1.867e+00 1.771e+00 -1.054 0.291869
## V14.Portugal -6.629e-01 1.856e+00 -0.357 0.720983
## V14.Puerto.Rico -2.366e+00 1.781e+00 -1.329 0.183994
## V14.Scotland -2.715e+01 2.557e+05 0.000 0.999915
## V14.South -1.428e+00 1.710e+00 -0.835 0.403544
## V14.Taiwan -2.466e+00 1.940e+00 -1.271 0.203759
## V14.Thailand -2.201e+00 2.037e+00 -1.080 0.280071
## V14.Trinadad.Tobago -2.743e+01 2.557e+05 0.000 0.999914
## V14.United.States -1.026e+00 1.386e+00 -0.740 0.459267
## V14.Vietnam -2.153e+00 1.842e+00 -1.169 0.242491
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6474.2 on 7539 degrees of freedom
## Residual deviance: 4003.6 on 7454 degrees of freedom
## AIC: 4175.6
##
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6514 877
## >50K 902 1475
##
## Accuracy : 0.8179
## 95% CI : (0.8101, 0.8255)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5037
##
## Mcnemar's Test P-Value : 0.5693
##
## Sensitivity : 0.8784
## Specificity : 0.6271
## Pos Pred Value : 0.8813
## Neg Pred Value : 0.6205
## Prevalence : 0.7592
## Detection Rate : 0.6669
## Detection Prevalence : 0.7567
## Balanced Accuracy : 0.7527
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6514 877
## >50K 902 1475
##
## Accuracy : 0.8179
## 95% CI : (0.8101, 0.8255)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5037
##
## Mcnemar's Test P-Value : 0.5693
##
## Sensitivity : 0.8784
## Specificity : 0.6271
## Pos Pred Value : 0.8813
## Neg Pred Value : 0.6205
## Prevalence : 0.7592
## Detection Rate : 0.6669
## Detection Prevalence : 0.7567
## Balanced Accuracy : 0.7527
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.178747e-01 5.036691e-01 8.100754e-01 8.254840e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.100498e-44 5.693464e-01
ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8783711 0.6271259 0.8813422
## Neg Pred Value Precision Recall
## 0.6205301 0.8813422 0.8783711
## F1 Prevalence Detection Rate
## 0.8798541 0.7592138 0.6668714
## Detection Prevalence Balanced Accuracy
## 0.7566544 0.7527485
ad_tda_kde_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n5_lr_fit_re)
diff_tda_kde_5.50.5_lr_n5_3_fold
## Accuracy
## 1 -0.01222692
## 2 -0.01162794
## 3 -0.02470011
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n5_3_fold$probRight
bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.9083
##
## $winRope
## [1] 0.0917
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_3_fold
## $left
## [1] 0.8321769
##
## $rope
## [1] 0.1510523
##
## $right
## [1] 0.01677077
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
#bf_tda_kde_5.50.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold)
## t = -3.7983, df = 2, p-value = 0.06285
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.034518896 0.002148919
## sample estimates:
## mean of x
## -0.01618499
### Test set diff
diff_tda_kde_5.50.5_lr.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n5_test
## Accuracy
## 0.03316953
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_test_odds.left<-bst_tda_kde_5.50.5_lr.n5_test$probLeft/bst_tda_kde_5.50.5_lr.n5_test$probRight
bst_tda_kde_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1622
##
## $winRight
## [1] 0.8378
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n5_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n5_test))
#naiveBayes
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Cambodia, V14.Holand.Netherlands, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7648399 0.03456348
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
adultNbFit$resample
## Accuracy Kappa Resample
## 1 0.7603001 0.006626826 Fold1
## 2 0.7750724 0.097063606 Fold2
## 3 0.7591471 0.000000000 Fold3
ad_nb_fit_re<-adultNbFit$resample[1]
summary(adultNbFit)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
#varImp (adultNbFit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
Nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
Nb_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2223
## >50K 0 129
##
## Accuracy : 0.7724
## 95% CI : (0.764, 0.7807)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.001113
##
## Kappa : 0.081
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.05485
## Pos Pred Value : 0.76937
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98679
## Balanced Accuracy : 0.52742
##
## 'Positive' Class : <=50K
##
Nb_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.772420147 0.080978544 0.763973853 0.780703712 0.759213759
## AccuracyPValue McnemarPValue
## 0.001113075 0.000000000
nb_cf_ov_acc<-Nb_cf$overall[1]
Nb_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 1.00000000 0.05484694 0.76937442
## Neg Pred Value Precision Recall
## 1.00000000 0.76937442 1.00000000
## F1 Prevalence Detection Rate
## 0.86965699 0.75921376 0.75921376
## Detection Prevalence Balanced Accuracy
## 0.98679361 0.52742347
nb_cf_pre_rec_f1<-Nb_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n1_NbFit0
## Naive Bayes
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3277, 3278, 3279
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9733579 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9731707 0 Fold1
## 2 0.9731544 0 Fold2
## 3 0.9737485 0 Fold3
ad_tda_pc_5.50.5_n1_nb_fit_re<-Adult_TDA_PC_5.50.5_n1_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n1_nb_fit_re)
diff_tda_pca_5.50.5_nb_n1_3_fold
## Accuracy
## 1 -0.2128706
## 2 -0.1980820
## 3 -0.2146013
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n1_3_fold$probRight
bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0.9917333
##
## $winRope
## [1] 0.008266667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_3_fold
## $left
## [1] 0.9995358
##
## $rope
## [1] 8.098509e-05
##
## $right
## [1] 0.000383183
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
#bf_tda_pca_5.50.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold)
## t = -39.779, df = 2, p-value = 0.0006314
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2310719 -0.1859641
## sample estimates:
## mean of x
## -0.208518
### Test set diff
diff_tda_pca_5.50.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n1_test
## Accuracy
## 0.5316339
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_test_odds.left<-bst_tda_pca_5.50.5_nb.n1_test$probLeft/bst_tda_pca_5.50.5_nb.n1_test$probRight
bst_tda_pca_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1610333
##
## $winRight
## [1] 0.8389667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n1_test))
##Node2
Adult_TDA_PC_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n2_NbFit0
## Naive Bayes
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8138, 8137, 8137
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.5670992 0.1900125
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.5796460 0.2117894 Fold1
## 2 0.5785205 0.2093729 Fold2
## 3 0.5431310 0.1488753 Fold3
ad_tda_pc_5.50.5_n2_nb_fit_re<-Adult_TDA_PC_5.50.5_n2_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3584 1595
## >50K 3832 757
##
## Accuracy : 0.4444
## 95% CI : (0.4345, 0.4543)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1471
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4833
## Specificity : 0.3219
## Pos Pred Value : 0.6920
## Neg Pred Value : 0.1650
## Prevalence : 0.7592
## Detection Rate : 0.3669
## Detection Prevalence : 0.5302
## Balanced Accuracy : 0.4026
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3584 1595
## >50K 3832 757
##
## Accuracy : 0.4444
## 95% CI : (0.4345, 0.4543)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1471
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4833
## Specificity : 0.3219
## Pos Pred Value : 0.6920
## Neg Pred Value : 0.1650
## Prevalence : 0.7592
## Detection Rate : 0.3669
## Detection Prevalence : 0.5302
## Balanced Accuracy : 0.4026
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.444103e-01 -1.470993e-01 4.345229e-01 4.543309e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 2.341485e-202
ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.4832794 0.3218537 0.6920255
## Neg Pred Value Precision Recall
## 0.1649597 0.6920255 0.4832794
## F1 Prevalence Detection Rate
## 0.5691147 0.7592138 0.3669124
## Detection Prevalence Balanced Accuracy
## 0.5302007 0.4025666
ad_tda_pc_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n2_nb_fit_re)
diff_tda_pca_5.50.5_nb_n2_3_fold
## Accuracy
## 1 0.1806541
## 2 0.1965519
## 3 0.2160162
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n2_3_fold$probRight
bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0089
##
## $winRight
## [1] 0.9911
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_3_fold
## $left
## [1] 0.001607426
##
## $rope
## [1] 0.000358595
##
## $right
## [1] 0.998034
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
#bf_tda_pca_5.50.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold)
## t = 19.338, df = 2, p-value = 0.002663
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1537442 0.2417372
## sample estimates:
## mean of x
## 0.1977407
### Test set diff
diff_tda_pca_5.50.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n2_test
## Accuracy
## 0.3280098
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_test_odds.left<-bst_tda_pca_5.50.5_nb.n2_test$probLeft/bst_tda_pca_5.50.5_nb.n2_test$probRight
bst_tda_pca_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1575
##
## $winRight
## [1] 0.8425
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n2_test)) #bf_tda_pca_5.50.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Dominican.Republic, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Portugal
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n3_NbFit0
## Naive Bayes
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8826, 8827
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7723564 0.006076996
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7715840 0.00000000 Fold1
## 2 0.7741278 0.01823099 Fold2
## 3 0.7713574 0.00000000 Fold3
ad_tda_pc_5.50.5_n3_nb_fit_re<-Adult_TDA_PC_5.50.5_n3_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2348
## >50K 0 4
##
## Accuracy : 0.7596
## 95% CI : (0.751, 0.7681)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.4678
##
## Kappa : 0.0026
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.001701
## Pos Pred Value : 0.759525
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.999590
## Balanced Accuracy : 0.500850
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2348
## >50K 0 4
##
## Accuracy : 0.7596
## 95% CI : (0.751, 0.7681)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.4678
##
## Kappa : 0.0026
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.001701
## Pos Pred Value : 0.759525
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.999590
## Balanced Accuracy : 0.500850
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.759623260 0.002580085 0.751021356 0.768070101 0.759213759
## AccuracyPValue McnemarPValue
## 0.467802791 0.000000000
ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.00000000 0.00170068 0.75952478
## Neg Pred Value Precision Recall
## 1.00000000 0.75952478 1.00000000
## F1 Prevalence Detection Rate
## 0.86332945 0.75921376 0.75921376
## Detection Prevalence Balanced Accuracy
## 0.99959050 0.50085034
ad_tda_pc_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n3_nb_fit_re)
diff_tda_pca_5.50.5_nb_n3_3_fold
## Accuracy
## 1 -0.0112838380
## 2 0.0009446122
## 3 -0.0122102093
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n3_3_fold$probRight
bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.3229667
##
## $winRope
## [1] 0.6770333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_3_fold
## $left
## [1] 0.331151
##
## $rope
## [1] 0.6338555
##
## $right
## [1] 0.03499351
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
#bf_tda_pca_5.50.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold)
## t = -1.7732, df = 2, p-value = 0.2182
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02575538 0.01072242
## sample estimates:
## mean of x
## -0.007516478
### Test set diff
diff_tda_pca_5.50.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n3_test
## Accuracy
## 0.01279689
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_test_odds.left<-bst_tda_pca_5.50.5_nb.n3_test$probLeft/bst_tda_pca_5.50.5_nb.n3_test$probRight
bst_tda_pca_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1586333
##
## $winRight
## [1] 0.8413667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n3_test)) #bf_tda_pca_5.50.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n3_test))
##Node4
Adult_TDA_PC_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.India, V14.Iran, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Jamaica, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n4_NbFit0
## Naive Bayes
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11133, 11134
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9449102 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9448536 0 Fold1
## 2 0.9448536 0 Fold2
## 3 0.9450234 0 Fold3
ad_tda_pc_5.50.5_n4_nb_fit_re<-Adult_TDA_PC_5.50.5_n4_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n4_nb_fit_re)
diff_tda_pca_5.50.5_nb_n4_3_fold
## Accuracy
## 1 -0.1845535
## 2 -0.1697812
## 3 -0.1858762
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n4_3_fold$probRight
bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9902
##
## $winRope
## [1] 0.0098
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9993877
##
## $rope
## [1] 0.0001218899
##
## $right
## [1] 0.0004903664
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
#bf_tda_pca_5.50.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold)
## t = -34.906, df = 2, p-value = 0.0008197
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2022664 -0.1578742
## sample estimates:
## mean of x
## -0.1800703
### Test set diff
diff_tda_pca_5.50.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n4_test
## Accuracy
## 0.01320639
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_test_odds.left<-bst_tda_pca_5.50.5_nb.n4_test$probLeft/bst_tda_pca_5.50.5_nb.n4_test$probRight
bst_tda_pca_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4594667
##
## $winRight
## [1] 0.5405333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n4_test)) #bf_tda_pca_5.50.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.voc, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n5_NbFit0
## Naive Bayes
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9602, 9604, 9602
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9979867 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9979175 0 Fold1
## 2 0.9981250 0 Fold2
## 3 0.9979175 0 Fold3
ad_tda_pc_5.50.5_n5_nb_fit_re<-Adult_TDA_PC_5.50.5_n5_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n5_nb_fit_re)
diff_tda_pca_5.50.5_nb_n5_3_fold
## Accuracy
## 1 -0.2376174
## 2 -0.2230526
## 3 -0.2387704
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n5_3_fold$probRight
bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9917
##
## $winRope
## [1] 0.0083
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9996578
##
## $rope
## [1] 5.392945e-05
##
## $right
## [1] 0.0002882463
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
#bf_tda_pca_5.50.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold)
## t = -46.094, df = 2, p-value = 0.0004703
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2549099 -0.2113837
## sample estimates:
## mean of x
## -0.2331468
### Test set diff
diff_tda_pca_5.50.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n5_test
## Accuracy
## 0.01320639
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_test_odds.left<-bst_tda_pca_5.50.5_nb.n5_test$probLeft/bst_tda_pca_5.50.5_nb.n5_test$probRight
bst_tda_pca_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4610667
##
## $winRight
## [1] 0.5389333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n5_test)) #bf_tda_pca_5.50.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n1_NbFit0
## Naive Bayes
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8924, 8925, 8925
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7541639 0.0741204
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7613713 0.11345994 Fold1
## 2 0.7449574 0.02413639 Fold2
## 3 0.7561632 0.08476488 Fold3
ad_tda_kde_5.50.5_n1_nb_fit_re<-Adult_TDA_KDE_5.50.5_n1_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2195
## >50K 0 157
##
## Accuracy : 0.7753
## 95% CI : (0.7669, 0.7835)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 9.556e-05
##
## Kappa : 0.098
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06675
## Pos Pred Value : 0.77162
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98393
## Balanced Accuracy : 0.53338
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2195
## >50K 0 157
##
## Accuracy : 0.7753
## 95% CI : (0.7669, 0.7835)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 9.556e-05
##
## Kappa : 0.098
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06675
## Pos Pred Value : 0.77162
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98393
## Balanced Accuracy : 0.53338
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.752867e-01 9.796737e-02 7.668767e-01 7.835322e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.556408e-05 0.000000e+00
ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n1_nb_fit_re)
diff_tda_kde_5.50.5_nb_n1_3_fold
## Accuracy
## 1 -0.001071156
## 2 0.030114969
## 3 0.002983988
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n1_3_fold$probRight
bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.5771333
##
## $winRight
## [1] 0.4228667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_3_fold
## $left
## [1] 0.1044478
##
## $rope
## [1] 0.3744305
##
## $right
## [1] 0.5211218
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
#bf_tda_kde_5.50.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold)
## t = 1.0905, df = 2, p-value = 0.3894
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03144600 0.05279787
## sample estimates:
## mean of x
## 0.01067593
### Test set diff
diff_tda_kde_5.50.5_nb.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n1_test
## Accuracy
## 0.07575758
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_test_odds.left<-bst_tda_kde_5.50.5_nb.n1_test$probLeft/bst_tda_kde_5.50.5_nb.n1_test$probRight
bst_tda_kde_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1569333
##
## $winRight
## [1] 0.8430667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n1_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n1_test))
##Node2
Adult_TDA_KDE_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Cambodia, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n2_NbFit0
## Naive Bayes
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8138, 8138, 8136
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.6282283 0.2926769
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7082104 0.4265867 Fold1
## 2 0.6258604 0.2898944 Fold2
## 3 0.5506143 0.1615495 Fold3
ad_tda_kde_5.50.5_n2_nb_fit_re<-Adult_TDA_KDE_5.50.5_n2_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3584 1595
## >50K 3832 757
##
## Accuracy : 0.4444
## 95% CI : (0.4345, 0.4543)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1471
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4833
## Specificity : 0.3219
## Pos Pred Value : 0.6920
## Neg Pred Value : 0.1650
## Prevalence : 0.7592
## Detection Rate : 0.3669
## Detection Prevalence : 0.5302
## Balanced Accuracy : 0.4026
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3584 1595
## >50K 3832 757
##
## Accuracy : 0.4444
## 95% CI : (0.4345, 0.4543)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1471
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4833
## Specificity : 0.3219
## Pos Pred Value : 0.6920
## Neg Pred Value : 0.1650
## Prevalence : 0.7592
## Detection Rate : 0.3669
## Detection Prevalence : 0.5302
## Balanced Accuracy : 0.4026
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.444103e-01 -1.470993e-01 4.345229e-01 4.543309e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 2.341485e-202
ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.4832794 0.3218537 0.6920255
## Neg Pred Value Precision Recall
## 0.1649597 0.6920255 0.4832794
## F1 Prevalence Detection Rate
## 0.5691147 0.7592138 0.3669124
## Detection Prevalence Balanced Accuracy
## 0.5302007 0.4025666
ad_tda_kde_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n2_nb_fit_re)
diff_tda_kde_5.50.5_nb_n2_3_fold
## Accuracy
## 1 0.0520897
## 2 0.1492120
## 3 0.2085329
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n2_3_fold$probRight
bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0098
##
## $winRight
## [1] 0.9902
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_3_fold
## $left
## [1] 0.05420179
##
## $rope
## [1] 0.01480854
##
## $right
## [1] 0.9309897
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
#bf_tda_kde_5.50.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold)
## t = 2.996, df = 2, p-value = 0.09569
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.05958342 0.33280648
## sample estimates:
## mean of x
## 0.1366115
### Test set diff
diff_tda_kde_5.50.5_nb.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n2_test
## Accuracy
## 0.4066339
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_test_odds.left<-bst_tda_kde_5.50.5_nb.n2_test$probLeft/bst_tda_kde_5.50.5_nb.n2_test$probRight
bst_tda_kde_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1568333
##
## $winRight
## [1] 0.8431667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n2_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n2_test)) #bf_tda_kde_5.50.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n3_NbFit0
## Naive Bayes
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8828, 8826, 8826
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7714502 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7715322 0 Fold1
## 2 0.7714092 0 Fold2
## 3 0.7714092 0 Fold3
ad_tda_kde_5.50.5_n3_nb_fit_re<-Adult_TDA_KDE_5.50.5_n3_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2348
## >50K 0 4
##
## Accuracy : 0.7596
## 95% CI : (0.751, 0.7681)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.4678
##
## Kappa : 0.0026
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.001701
## Pos Pred Value : 0.759525
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.999590
## Balanced Accuracy : 0.500850
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2348
## >50K 0 4
##
## Accuracy : 0.7596
## 95% CI : (0.751, 0.7681)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.4678
##
## Kappa : 0.0026
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.001701
## Pos Pred Value : 0.759525
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.999590
## Balanced Accuracy : 0.500850
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.759623260 0.002580085 0.751021356 0.768070101 0.759213759
## AccuracyPValue McnemarPValue
## 0.467802791 0.000000000
ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.00000000 0.00170068 0.75952478
## Neg Pred Value Precision Recall
## 1.00000000 0.75952478 1.00000000
## F1 Prevalence Detection Rate
## 0.86332945 0.75921376 0.75921376
## Detection Prevalence Balanced Accuracy
## 0.99959050 0.50085034
ad_tda_kde_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n3_nb_fit_re)
diff_tda_kde_5.50.5_nb_n3_3_fold
## Accuracy
## 1 -0.011232066
## 2 0.003663235
## 3 -0.012262009
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n3_3_fold$probRight
bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.3242667
##
## $winRope
## [1] 0.6757333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_3_fold
## $left
## [1] 0.3129359
##
## $rope
## [1] 0.6332286
##
## $right
## [1] 0.05383551
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
#bf_tda_kde_5.50.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold)
## t = -1.2847, df = 2, p-value = 0.3276
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02874895 0.01552839
## sample estimates:
## mean of x
## -0.00661028
### Test set diff
diff_tda_kde_5.50.5_nb.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n3_test
## Accuracy
## 0.09142097
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_test_odds.left<-bst_tda_kde_5.50.5_nb.n3_test$probLeft/bst_tda_kde_5.50.5_nb.n3_test$probRight
bst_tda_kde_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1598667
##
## $winRight
## [1] 0.8401333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n3_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n3_test)) #bf_tda_kde_5.50.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n4_NbFit0
## Naive Bayes
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11134, 11133, 11133
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9449102 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9450234 0 Fold1
## 2 0.9448536 0 Fold2
## 3 0.9448536 0 Fold3
ad_tda_kde_5.50.5_n4_nb_fit_re<-Adult_TDA_KDE_5.50.5_n4_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n4_nb_fit_re)
diff_tda_kde_5.50.5_nb_n4_3_fold
## Accuracy
## 1 -0.1847232
## 2 -0.1697812
## 3 -0.1857065
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n4_3_fold$probRight
bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9916
##
## $winRope
## [1] 0.0084
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9993892
##
## $rope
## [1] 0.0001215921
##
## $right
## [1] 0.000489166
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
#bf_tda_kde_5.50.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold)
## t = -34.949, df = 2, p-value = 0.0008177
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2022392 -0.1579015
## sample estimates:
## mean of x
## -0.1800703
### Test set diff
diff_tda_kde_5.50.5_nb.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n4_test
## Accuracy
## 0.09183047
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_test_odds.left<-bst_tda_kde_5.50.5_nb.n4_test$probLeft/bst_tda_kde_5.50.5_nb.n4_test$probRight
bst_tda_kde_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1612
##
## $winRight
## [1] 0.8388
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n4_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n4_test)) #bf_tda_kde_5.50.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n5_NbFit0
## Naive Bayes
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9604, 9602, 9602
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9979867 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9981250 0 Fold1
## 2 0.9979175 0 Fold2
## 3 0.9979175 0 Fold3
ad_tda_kde_5.50.5_n5_nb_fit_re<-Adult_TDA_KDE_5.50.5_n5_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n5_nb_fit_re)
diff_tda_kde_5.50.5_nb_n5_3_fold
## Accuracy
## 1 -0.2378249
## 2 -0.2228451
## 3 -0.2387704
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n5_3_fold$probRight
bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9906667
##
## $winRope
## [1] 0.009333333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9996442
##
## $rope
## [1] 5.607864e-05
##
## $right
## [1] 0.0002997458
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
#bf_tda_kde_5.50.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold)
## t = -45.201, df = 2, p-value = 0.0004891
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2553401 -0.2109535
## sample estimates:
## mean of x
## -0.2331468
### Test set diff
diff_tda_kde_5.50.5_nb.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n5_test
## Accuracy
## 0.09183047
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_test_odds.left<-bst_tda_kde_5.50.5_nb.n5_test$probLeft/bst_tda_kde_5.50.5_nb.n5_test$probRight
bst_tda_kde_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1561667
##
## $winRight
## [1] 0.8438333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n5_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n5_test)) #bf_tda_kde_5.50.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n5_test))